kugelaudio 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -6,49 +6,192 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
6
6
  });
7
7
 
8
8
  // src/errors.ts
9
+ var ErrorCodes = {
10
+ UNAUTHORIZED: "UNAUTHORIZED",
11
+ RATE_LIMITED: "RATE_LIMITED",
12
+ INSUFFICIENT_CREDITS: "INSUFFICIENT_CREDITS",
13
+ MODEL_UNAVAILABLE: "MODEL_UNAVAILABLE",
14
+ EMPTY_AUDIO: "EMPTY_AUDIO",
15
+ VALIDATION: "VALIDATION_ERROR",
16
+ INTERNAL: "INTERNAL_ERROR",
17
+ NOT_FOUND: "NOT_FOUND"
18
+ };
19
+ var WsCloseCodes = {
20
+ UNAUTHORIZED: 4001,
21
+ INSUFFICIENT_CREDITS: 4003,
22
+ RATE_LIMITED: 4029,
23
+ MODEL_UNAVAILABLE: 4500
24
+ };
25
+ var API_KEYS_URL = "https://app.kugelaudio.com/settings/api-keys";
26
+ var BILLING_URL = "https://app.kugelaudio.com/billing";
9
27
  var KugelAudioError = class _KugelAudioError extends Error {
10
- constructor(message, statusCode) {
11
- super(message);
28
+ constructor(message, options = {}) {
29
+ super(options.requestId ? `${message} (request_id: ${options.requestId})` : message);
12
30
  this.name = "KugelAudioError";
13
- this.statusCode = statusCode;
31
+ this.statusCode = options.statusCode;
32
+ this.errorCode = options.errorCode;
33
+ this.requestId = options.requestId;
34
+ this.retryAfter = options.retryAfter;
14
35
  Object.setPrototypeOf(this, _KugelAudioError.prototype);
15
36
  }
16
37
  };
17
38
  var AuthenticationError = class _AuthenticationError extends KugelAudioError {
18
- constructor(message = "Authentication failed") {
19
- super(message, 401);
39
+ constructor(message, options = {}) {
40
+ super(
41
+ message ?? `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`,
42
+ { statusCode: 401, errorCode: ErrorCodes.UNAUTHORIZED, ...options }
43
+ );
20
44
  this.name = "AuthenticationError";
21
45
  Object.setPrototypeOf(this, _AuthenticationError.prototype);
22
46
  }
23
47
  };
24
48
  var RateLimitError = class _RateLimitError extends KugelAudioError {
25
- constructor(message = "Rate limit exceeded") {
26
- super(message, 429);
49
+ constructor(message, options = {}) {
50
+ const msg = message ?? (options.retryAfter ? `KugelAudio rate limit hit; retry after ${options.retryAfter}s.` : "KugelAudio rate limit hit; retry shortly.");
51
+ super(msg, { statusCode: 429, errorCode: ErrorCodes.RATE_LIMITED, ...options });
27
52
  this.name = "RateLimitError";
28
53
  Object.setPrototypeOf(this, _RateLimitError.prototype);
29
54
  }
30
55
  };
31
56
  var InsufficientCreditsError = class _InsufficientCreditsError extends KugelAudioError {
32
- constructor(message = "Insufficient credits") {
33
- super(message, 403);
57
+ constructor(message, options = {}) {
58
+ super(
59
+ message ?? `Your KugelAudio account is out of credits. Top up at ${BILLING_URL}.`,
60
+ { statusCode: 402, errorCode: ErrorCodes.INSUFFICIENT_CREDITS, ...options }
61
+ );
34
62
  this.name = "InsufficientCreditsError";
35
63
  Object.setPrototypeOf(this, _InsufficientCreditsError.prototype);
36
64
  }
37
65
  };
38
66
  var ValidationError = class _ValidationError extends KugelAudioError {
39
- constructor(message) {
40
- super(message, 400);
67
+ constructor(message, options = {}) {
68
+ super(message, { statusCode: 400, errorCode: ErrorCodes.VALIDATION, ...options });
41
69
  this.name = "ValidationError";
42
70
  Object.setPrototypeOf(this, _ValidationError.prototype);
43
71
  }
44
72
  };
45
73
  var ConnectionError = class _ConnectionError extends KugelAudioError {
46
- constructor(message = "Failed to connect to server") {
47
- super(message, 503);
74
+ constructor(message, options = {}) {
75
+ super(message, { statusCode: 503, ...options });
48
76
  this.name = "ConnectionError";
49
77
  Object.setPrototypeOf(this, _ConnectionError.prototype);
50
78
  }
51
79
  };
80
+ function build(status, errorCode, message, opts = {}) {
81
+ const common = { ...opts };
82
+ if (status !== void 0) common.statusCode = status;
83
+ if (errorCode !== void 0) common.errorCode = errorCode;
84
+ if (errorCode === ErrorCodes.UNAUTHORIZED || status === 401) {
85
+ return new AuthenticationError(message || void 0, common);
86
+ }
87
+ if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
88
+ return new InsufficientCreditsError(message || void 0, common);
89
+ }
90
+ if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
91
+ return new RateLimitError(message || void 0, common);
92
+ }
93
+ if (errorCode === ErrorCodes.VALIDATION || status === 400) {
94
+ return new ValidationError(message || "Request validation failed.", common);
95
+ }
96
+ if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
97
+ const detail = message || "service temporarily unavailable";
98
+ return new ConnectionError(
99
+ `KugelAudio is temporarily unavailable: ${detail}. Retry shortly.`,
100
+ common
101
+ );
102
+ }
103
+ return new KugelAudioError(message || `HTTP ${status}`, common);
104
+ }
105
+ function readHeader(headers, name) {
106
+ if (headers && typeof headers.get === "function") {
107
+ return headers.get(name) ?? void 0;
108
+ }
109
+ const rec = headers;
110
+ return rec[name] ?? rec[name.toLowerCase()] ?? void 0;
111
+ }
112
+ function classifyHttpError(status, bodyText, headers) {
113
+ let errorCode;
114
+ let message = "";
115
+ let retryAfter;
116
+ if (bodyText) {
117
+ try {
118
+ const body = JSON.parse(bodyText);
119
+ if (body && typeof body === "object") {
120
+ errorCode = typeof body.error_code === "string" ? body.error_code : void 0;
121
+ const msg = body.error ?? body.detail;
122
+ if (Array.isArray(msg)) {
123
+ message = msg.map((m) => String(m)).join("; ");
124
+ } else if (typeof msg === "string") {
125
+ message = msg;
126
+ }
127
+ if (typeof body.retry_after === "number") {
128
+ retryAfter = body.retry_after;
129
+ }
130
+ }
131
+ } catch {
132
+ }
133
+ }
134
+ if (retryAfter === void 0) {
135
+ const header = readHeader(headers, "Retry-After") ?? readHeader(headers, "retry-after");
136
+ if (header) {
137
+ const n = Number(header);
138
+ if (Number.isFinite(n)) retryAfter = n;
139
+ }
140
+ }
141
+ const requestId = readHeader(headers, "x-request-id") ?? readHeader(headers, "X-Request-Id");
142
+ if (!message) {
143
+ message = (bodyText || "").trim();
144
+ }
145
+ return build(status, errorCode, message, { requestId, retryAfter });
146
+ }
147
+ function classifyWsFrame(data) {
148
+ const errorCode = data.error_code;
149
+ const message = data.error ?? "Server reported an error.";
150
+ const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
151
+ return build(void 0, errorCode, message, { retryAfter });
152
+ }
153
+ function classifyWsClose(code, reason) {
154
+ const reasonTxt = (reason ?? "").trim();
155
+ if (code === WsCloseCodes.UNAUTHORIZED) {
156
+ let msg = `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`;
157
+ if (reasonTxt) msg = `${msg} (${reasonTxt})`;
158
+ return new AuthenticationError(msg);
159
+ }
160
+ if (code === WsCloseCodes.INSUFFICIENT_CREDITS) {
161
+ return new InsufficientCreditsError();
162
+ }
163
+ if (code === WsCloseCodes.RATE_LIMITED) {
164
+ return new RateLimitError();
165
+ }
166
+ if (code === WsCloseCodes.MODEL_UNAVAILABLE) {
167
+ const suffix = reasonTxt ? ` (${reasonTxt})` : "";
168
+ return new ConnectionError(
169
+ `KugelAudio model is temporarily unavailable. Retry shortly.${suffix}`
170
+ );
171
+ }
172
+ const detail = reasonTxt || "no reason given";
173
+ const codeStr = code !== void 0 ? ` (code ${code})` : "";
174
+ return new ConnectionError(
175
+ `KugelAudio WebSocket closed by server: ${detail}${codeStr}.`
176
+ );
177
+ }
178
+ function classifyWsHandshakeError(err) {
179
+ if (!err || typeof err !== "object") return null;
180
+ const e = err;
181
+ let status;
182
+ if (typeof e.statusCode === "number") {
183
+ status = e.statusCode;
184
+ }
185
+ if (status === void 0 && typeof e.message === "string") {
186
+ const m = e.message.match(/Unexpected server response:\s*(\d{3})/i);
187
+ if (m) status = Number(m[1]);
188
+ }
189
+ if (status === void 0) return null;
190
+ if (status === 403) {
191
+ return new AuthenticationError();
192
+ }
193
+ return build(status, void 0, typeof e.message === "string" ? e.message : "");
194
+ }
52
195
 
53
196
  // src/utils.ts
54
197
  function base64ToArrayBuffer(base64) {
@@ -108,33 +251,61 @@ function createWavBlob(audio, sampleRate) {
108
251
 
109
252
  // src/websocket.ts
110
253
  var _cachedWs = null;
254
+ function isNodeJs() {
255
+ return typeof process !== "undefined" && !!process.versions && typeof process.versions.node === "string";
256
+ }
111
257
  function getWebSocket() {
112
258
  if (_cachedWs) return _cachedWs;
259
+ if (isNodeJs()) {
260
+ try {
261
+ const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
262
+ if (_require) {
263
+ const ws = _require("ws");
264
+ _cachedWs = ws.default || ws;
265
+ return _cachedWs;
266
+ }
267
+ } catch {
268
+ }
269
+ }
113
270
  if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
114
271
  _cachedWs = globalThis.WebSocket;
115
272
  return _cachedWs;
116
273
  }
117
- try {
118
- const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
119
- if (_require) {
120
- const ws = _require("ws");
121
- _cachedWs = ws.default || ws;
122
- return _cachedWs;
123
- }
124
- } catch {
125
- }
126
274
  throw new Error(
127
275
  'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
128
276
  );
129
277
  }
130
278
 
131
279
  // src/client.ts
132
- var DEFAULT_API_URL = "https://api.kugelaudio.com";
280
+ var REGION_URLS = {
281
+ eu: "https://api.kugelaudio.com",
282
+ us: "https://us-api.kugelaudio.com",
283
+ global: "https://global-api.kugelaudio.com"
284
+ };
285
+ var REGION_PREFIXES = ["eu-", "us-", "global-"];
286
+ function parseApiKey(apiKey) {
287
+ for (const prefix of REGION_PREFIXES) {
288
+ if (apiKey.startsWith(prefix)) {
289
+ return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) };
290
+ }
291
+ }
292
+ return { cleanKey: apiKey };
293
+ }
133
294
  function createWs(url) {
134
295
  const WS = getWebSocket();
135
296
  return new WS(url);
136
297
  }
137
298
  var WS_OPEN = 1;
299
+ var _languageWarningLogged = false;
300
+ function warnIfNoLanguage(language, normalize) {
301
+ const normEnabled = normalize === void 0 || normalize;
302
+ if (!language && normEnabled && !_languageWarningLogged) {
303
+ _languageWarningLogged = true;
304
+ console.warn(
305
+ "[KugelAudio] No 'language' set with normalization enabled \u2014 the server will auto-detect the language, adding ~60-150ms to TTFA. Set language (e.g., language: 'en') for optimal latency."
306
+ );
307
+ }
308
+ }
138
309
  var ModelsResource = class {
139
310
  constructor(client) {
140
311
  this.client = client;
@@ -168,42 +339,177 @@ var VoicesResource = class {
168
339
  params.set("include_public", String(options.includePublic));
169
340
  }
170
341
  if (options?.limit) params.set("limit", String(options.limit));
342
+ if (options?.offset) params.set("offset", String(options.offset));
171
343
  const query = params.toString();
172
344
  const path = query ? `/v1/voices?${query}` : "/v1/voices";
173
345
  const response = await this.client.request("GET", path);
174
- return response.voices.map((v) => ({
175
- id: v.id,
176
- name: v.name,
177
- description: v.description,
178
- category: v.category,
179
- sex: v.sex,
180
- age: v.age,
181
- supportedLanguages: v.supported_languages || [],
182
- sampleText: v.sample_text,
183
- avatarUrl: v.avatar_url,
184
- sampleUrl: v.sample_url,
185
- isPublic: v.is_public || false,
186
- verified: v.verified || false
187
- }));
346
+ return {
347
+ voices: response.voices.map((v) => ({
348
+ id: v.id,
349
+ name: v.name,
350
+ description: v.description,
351
+ category: v.category,
352
+ sex: v.sex,
353
+ age: v.age,
354
+ supportedLanguages: v.supported_languages || [],
355
+ sampleText: v.sample_text,
356
+ avatarUrl: v.avatar_url,
357
+ sampleUrl: v.sample_url,
358
+ isPublic: v.is_public || false,
359
+ verified: v.verified || false
360
+ })),
361
+ total: response.total,
362
+ limit: response.limit,
363
+ offset: response.offset
364
+ };
188
365
  }
189
366
  /**
190
367
  * Get a specific voice by ID.
191
368
  */
192
369
  async get(voiceId) {
193
370
  const v = await this.client.request("GET", `/v1/voices/${voiceId}`);
371
+ return this.mapVoiceDetail(v);
372
+ }
373
+ /**
374
+ * Create a new voice.
375
+ */
376
+ async create(options) {
377
+ const metadata = {
378
+ name: options.name,
379
+ sex: options.sex,
380
+ description: options.description ?? "",
381
+ category: options.category ?? "conversational",
382
+ age: options.age ?? "middle_age",
383
+ quality: options.quality ?? "mid",
384
+ supported_languages: options.supportedLanguages ?? ["en"],
385
+ is_public: options.isPublic ?? false,
386
+ sample_text: options.sampleText ?? ""
387
+ };
388
+ const formData = new FormData();
389
+ formData.append(
390
+ "metadata",
391
+ new Blob([JSON.stringify(metadata)], { type: "application/json" })
392
+ );
393
+ if (options.referenceFiles) {
394
+ for (const file of options.referenceFiles) {
395
+ formData.append("files", file);
396
+ }
397
+ }
398
+ const v = await this.client.requestMultipart("POST", "/v1/voices", formData);
399
+ return this.mapVoiceDetail(v);
400
+ }
401
+ /**
402
+ * Update an existing voice. Only provided fields are updated.
403
+ */
404
+ async update(voiceId, options) {
405
+ const payload = {};
406
+ if (options.name !== void 0) payload.name = options.name;
407
+ if (options.description !== void 0) payload.description = options.description;
408
+ if (options.category !== void 0) payload.category = options.category;
409
+ if (options.age !== void 0) payload.age = options.age;
410
+ if (options.sex !== void 0) payload.sex = options.sex;
411
+ if (options.quality !== void 0) payload.quality = options.quality;
412
+ if (options.supportedLanguages !== void 0) payload.supported_languages = options.supportedLanguages;
413
+ if (options.isPublic !== void 0) payload.is_public = options.isPublic;
414
+ if (options.sampleText !== void 0) payload.sample_text = options.sampleText;
415
+ const v = await this.client.request("PATCH", `/v1/voices/${voiceId}`, payload);
416
+ return this.mapVoiceDetail(v);
417
+ }
418
+ /**
419
+ * Delete a voice.
420
+ */
421
+ async delete(voiceId) {
422
+ await this.client.request("DELETE", `/v1/voices/${voiceId}`);
423
+ }
424
+ // -- Reference management --
425
+ /**
426
+ * List reference audio files for a voice.
427
+ */
428
+ async listReferences(voiceId) {
429
+ const response = await this.client.request(
430
+ "GET",
431
+ `/v1/voices/${voiceId}/references`
432
+ );
433
+ return response.references.map((r) => this.mapVoiceReference(r));
434
+ }
435
+ /**
436
+ * Upload a reference audio file to a voice.
437
+ *
438
+ * @param voiceId - Voice ID
439
+ * @param file - Audio file (File in browser, Blob in Node.js)
440
+ * @param referenceText - Optional transcript of the reference audio
441
+ */
442
+ async addReference(voiceId, file, referenceText) {
443
+ const formData = new FormData();
444
+ formData.append("file", file);
445
+ if (referenceText) {
446
+ formData.append("reference_text", referenceText);
447
+ }
448
+ const r = await this.client.requestMultipart(
449
+ "POST",
450
+ `/v1/voices/${voiceId}/references`,
451
+ formData
452
+ );
453
+ return this.mapVoiceReference(r);
454
+ }
455
+ /**
456
+ * Delete a reference audio file from a voice.
457
+ */
458
+ async deleteReference(voiceId, referenceId) {
459
+ await this.client.request(
460
+ "DELETE",
461
+ `/v1/voices/${voiceId}/references/${referenceId}`
462
+ );
463
+ }
464
+ // -- Publishing --
465
+ /**
466
+ * Request publication of a voice. Sets it as public and marks it
467
+ * as pending verification by an admin.
468
+ */
469
+ async publish(voiceId) {
470
+ const v = await this.client.request("POST", `/v1/voices/${voiceId}/publish`);
471
+ return this.mapVoiceDetail(v);
472
+ }
473
+ // -- Sample generation --
474
+ /**
475
+ * Trigger sample audio generation for a voice.
476
+ */
477
+ async generateSample(voiceId) {
478
+ const v = await this.client.request(
479
+ "POST",
480
+ `/v1/voices/${voiceId}/generate-sample`
481
+ );
482
+ return this.mapVoiceDetail(v);
483
+ }
484
+ // -- Helpers --
485
+ mapVoiceDetail(v) {
194
486
  return {
195
487
  id: v.id,
196
488
  name: v.name,
197
- description: v.description,
198
- category: v.category,
199
- sex: v.sex,
489
+ description: v.description ?? "",
490
+ generativeVoiceDescription: v.generative_voice_description ?? "",
491
+ supportedLanguages: v.supported_languages ?? [],
492
+ category: v.category ?? "cloned",
200
493
  age: v.age,
201
- supportedLanguages: v.supported_languages || [],
202
- sampleText: v.sample_text,
203
- avatarUrl: v.avatar_url,
494
+ sex: v.sex,
495
+ quality: v.quality ?? "mid",
496
+ isPublic: v.is_public ?? false,
497
+ verified: v.verified ?? false,
498
+ pendingVerification: v.pending_verification ?? false,
204
499
  sampleUrl: v.sample_url,
205
- isPublic: v.is_public || false,
206
- verified: v.verified || false
500
+ avatarUrl: v.avatar_url,
501
+ sampleText: v.sample_text ?? ""
502
+ };
503
+ }
504
+ mapVoiceReference(r) {
505
+ return {
506
+ id: r.id,
507
+ voiceId: r.voice_id,
508
+ name: r.name ?? "",
509
+ referenceText: r.reference_text ?? "",
510
+ s3Path: r.s3_path ?? "",
511
+ audioUrl: r.audio_url,
512
+ isGenerated: r.is_generated ?? false
207
513
  };
208
514
  }
209
515
  };
@@ -215,6 +521,7 @@ var TTSResource = class {
215
521
  this.wsUrl = null;
216
522
  this.pendingRequests = /* @__PURE__ */ new Map();
217
523
  this.requestCounter = 0;
524
+ this.keepaliveTimer = null;
218
525
  }
219
526
  /**
220
527
  * Pre-establish WebSocket connection for faster first request.
@@ -249,10 +556,14 @@ var TTSResource = class {
249
556
  async generate(options) {
250
557
  const chunks = [];
251
558
  let finalStats;
559
+ const allTimestamps = [];
252
560
  await this.stream(options, {
253
561
  onChunk: (chunk) => {
254
562
  chunks.push(base64ToArrayBuffer(chunk.audio));
255
563
  },
564
+ onWordTimestamps: (timestamps) => {
565
+ allTimestamps.push(...timestamps);
566
+ },
256
567
  onFinal: (stats) => {
257
568
  finalStats = stats;
258
569
  }
@@ -270,9 +581,67 @@ var TTSResource = class {
270
581
  samples: finalStats ? finalStats.totalSamples : totalLength / 2,
271
582
  durationMs: finalStats ? finalStats.durationMs : 0,
272
583
  generationMs: finalStats ? finalStats.generationMs : 0,
273
- rtf: finalStats ? finalStats.rtf : 0
584
+ rtf: finalStats ? finalStats.rtf : 0,
585
+ wordTimestamps: allTimestamps
274
586
  };
275
587
  }
588
+ /**
589
+ * Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
590
+ *
591
+ * **Node.js only** — this method requires the `stream` built-in module and is
592
+ * intended for server-side integrations such as Vapi custom TTS endpoints,
593
+ * Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
594
+ *
595
+ * Compared to manually wiring `onChunk` to a `Readable`, this method avoids
596
+ * a common race-condition: the stream object is created and returned **before**
597
+ * any chunks arrive, so the caller can safely pipe or attach listeners before
598
+ * the first audio byte is pushed.
599
+ *
600
+ * @example Vapi custom TTS endpoint
601
+ * ```typescript
602
+ * app.post('/synthesize', (req, res) => {
603
+ * res.setHeader('Content-Type', 'audio/pcm');
604
+ * res.setHeader('Transfer-Encoding', 'chunked');
605
+ *
606
+ * const readable = client.tts.toReadable({
607
+ * text: req.body.message.text,
608
+ * modelId: 'kugel-1-turbo',
609
+ * sampleRate: req.body.message.sampleRate,
610
+ * language: 'en',
611
+ * });
612
+ *
613
+ * readable.pipe(res);
614
+ * });
615
+ * ```
616
+ *
617
+ * @param options - TTS generation options (same as `stream()`)
618
+ * @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
619
+ * @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
620
+ */
621
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
622
+ toReadable(options, reuseConnection = true) {
623
+ const { Readable } = __require("stream");
624
+ const readable = new Readable({ read() {
625
+ } });
626
+ this.stream(
627
+ options,
628
+ {
629
+ onChunk: (chunk) => {
630
+ readable.push(Buffer.from(chunk.audio, "base64"));
631
+ },
632
+ onFinal: () => {
633
+ readable.push(null);
634
+ },
635
+ onError: (error) => {
636
+ readable.destroy(error);
637
+ }
638
+ },
639
+ reuseConnection
640
+ ).catch((error) => {
641
+ readable.destroy(error);
642
+ });
643
+ return readable;
644
+ }
276
645
  /**
277
646
  * Build the WebSocket URL with appropriate auth param.
278
647
  */
@@ -314,10 +683,17 @@ var TTSResource = class {
314
683
  this.wsConnection = ws;
315
684
  this.wsUrl = url;
316
685
  this.setupMessageHandler(ws);
686
+ this.startKeepalive(ws);
317
687
  resolve(ws);
318
688
  };
319
- ws.onerror = () => {
320
- reject(new KugelAudioError("WebSocket connection error"));
689
+ ws.onerror = (event) => {
690
+ const underlying = event?.error ?? event;
691
+ const typed = classifyWsHandshakeError(underlying);
692
+ reject(
693
+ typed ?? new ConnectionError(
694
+ `Could not establish KugelAudio WebSocket connection to ${url}. Check network connectivity.`
695
+ )
696
+ );
321
697
  };
322
698
  });
323
699
  }
@@ -332,7 +708,7 @@ var TTSResource = class {
332
708
  const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
333
709
  if (!pending) return;
334
710
  if (data.error) {
335
- const error = this.parseError(data.error);
711
+ const error = this.parseError(data);
336
712
  pending.callbacks.onError?.(error);
337
713
  this.pendingRequests.delete(requestId);
338
714
  pending.reject(error);
@@ -345,7 +721,6 @@ var TTSResource = class {
345
721
  totalSamples: data.total_samples,
346
722
  durationMs: data.dur_ms,
347
723
  generationMs: data.gen_ms,
348
- ttfaMs: data.ttfa_ms,
349
724
  rtf: data.rtf,
350
725
  error: data.error
351
726
  };
@@ -364,25 +739,41 @@ var TTSResource = class {
364
739
  };
365
740
  pending.callbacks.onChunk?.(chunk);
366
741
  }
742
+ if (data.word_timestamps) {
743
+ const timestamps = data.word_timestamps.map(
744
+ (w) => ({
745
+ word: w.word,
746
+ startMs: w.start_ms,
747
+ endMs: w.end_ms,
748
+ charStart: w.char_start,
749
+ charEnd: w.char_end,
750
+ score: w.score ?? 1
751
+ })
752
+ );
753
+ pending.callbacks.onWordTimestamps?.(timestamps);
754
+ }
367
755
  } catch (e) {
368
756
  console.error("Failed to parse WebSocket message:", e);
369
757
  }
370
758
  };
371
759
  ws.onclose = (event) => {
760
+ this.stopKeepalive();
372
761
  this.wsConnection = null;
373
762
  this.wsUrl = null;
374
763
  for (const [id, pending] of this.pendingRequests) {
375
764
  pending.callbacks.onClose?.();
376
- if (event.code === 4001) {
377
- pending.reject(new AuthenticationError("Authentication failed"));
378
- } else if (event.code === 4003) {
379
- pending.reject(new InsufficientCreditsError("Insufficient credits"));
765
+ if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
766
+ const error = classifyWsClose(event.code, event.reason);
767
+ pending.callbacks.onError?.(error);
768
+ pending.reject(error);
380
769
  }
381
770
  this.pendingRequests.delete(id);
382
771
  }
383
772
  };
384
773
  ws.onerror = () => {
385
- const error = new KugelAudioError("WebSocket connection error");
774
+ const error = new ConnectionError(
775
+ "KugelAudio WebSocket connection error. Check network connectivity."
776
+ );
386
777
  for (const [id, pending] of this.pendingRequests) {
387
778
  pending.callbacks.onError?.(error);
388
779
  pending.reject(error);
@@ -408,6 +799,7 @@ var TTSResource = class {
408
799
  * Stream with connection pooling (fast path).
409
800
  */
410
801
  async streamWithPooling(options, callbacks) {
802
+ warnIfNoLanguage(options.language, options.normalize);
411
803
  const ws = await this.getConnection();
412
804
  const requestId = ++this.requestCounter;
413
805
  return new Promise((resolve, reject) => {
@@ -418,10 +810,14 @@ var TTSResource = class {
418
810
  model_id: options.modelId || "kugel-1-turbo",
419
811
  voice_id: options.voiceId,
420
812
  cfg_scale: options.cfgScale ?? 2,
813
+ ...options.temperature !== void 0 && { temperature: options.temperature },
421
814
  max_new_tokens: options.maxNewTokens ?? 2048,
422
815
  sample_rate: options.sampleRate ?? 24e3,
423
816
  normalize: options.normalize ?? true,
424
- ...options.language && { language: options.language }
817
+ ...options.language && { language: options.language },
818
+ ...options.wordTimestamps && { word_timestamps: true },
819
+ ...options.speed !== void 0 && { speed: options.speed },
820
+ ...options.projectId !== void 0 && { project_id: options.projectId }
425
821
  }));
426
822
  });
427
823
  }
@@ -429,6 +825,7 @@ var TTSResource = class {
429
825
  * Stream without connection pooling (original behavior).
430
826
  */
431
827
  streamWithoutPooling(options, callbacks) {
828
+ warnIfNoLanguage(options.language, options.normalize);
432
829
  return new Promise((resolve, reject) => {
433
830
  const url = this.buildWsUrl();
434
831
  const ws = createWs(url);
@@ -442,7 +839,10 @@ var TTSResource = class {
442
839
  max_new_tokens: options.maxNewTokens ?? 2048,
443
840
  sample_rate: options.sampleRate ?? 24e3,
444
841
  normalize: options.normalize ?? true,
445
- ...options.language && { language: options.language }
842
+ ...options.language && { language: options.language },
843
+ ...options.wordTimestamps && { word_timestamps: true },
844
+ ...options.speed !== void 0 && { speed: options.speed },
845
+ ...options.projectId !== void 0 && { project_id: options.projectId }
446
846
  }));
447
847
  };
448
848
  ws.onmessage = (event) => {
@@ -450,7 +850,7 @@ var TTSResource = class {
450
850
  const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
451
851
  const data = JSON.parse(messageData);
452
852
  if (data.error) {
453
- const error = this.parseError(data.error);
853
+ const error = this.parseError(data);
454
854
  callbacks.onError?.(error);
455
855
  ws.close();
456
856
  reject(error);
@@ -463,7 +863,6 @@ var TTSResource = class {
463
863
  totalSamples: data.total_samples,
464
864
  durationMs: data.dur_ms,
465
865
  generationMs: data.gen_ms,
466
- ttfaMs: data.ttfa_ms,
467
866
  rtf: data.rtf,
468
867
  error: data.error
469
868
  };
@@ -482,29 +881,71 @@ var TTSResource = class {
482
881
  };
483
882
  callbacks.onChunk?.(chunk);
484
883
  }
884
+ if (data.word_timestamps) {
885
+ const timestamps = data.word_timestamps.map(
886
+ (w) => ({
887
+ word: w.word,
888
+ startMs: w.start_ms,
889
+ endMs: w.end_ms,
890
+ charStart: w.char_start,
891
+ charEnd: w.char_end,
892
+ score: w.score ?? 1
893
+ })
894
+ );
895
+ callbacks.onWordTimestamps?.(timestamps);
896
+ }
485
897
  } catch (e) {
486
898
  console.error("Failed to parse WebSocket message:", e);
487
899
  }
488
900
  };
489
- ws.onerror = () => {
490
- const error = new KugelAudioError("WebSocket connection error");
901
+ ws.onerror = (event) => {
902
+ const underlying = event?.error ?? event;
903
+ const error = classifyWsHandshakeError(underlying) ?? new ConnectionError(
904
+ "KugelAudio WebSocket connection error. Check network connectivity."
905
+ );
491
906
  callbacks.onError?.(error);
492
907
  reject(error);
493
908
  };
494
909
  ws.onclose = (event) => {
495
910
  callbacks.onClose?.();
496
- if (event.code === 4001) {
497
- reject(new AuthenticationError("Authentication failed"));
498
- } else if (event.code === 4003) {
499
- reject(new InsufficientCreditsError("Insufficient credits"));
911
+ if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
912
+ const error = classifyWsClose(event.code, event.reason);
913
+ callbacks.onError?.(error);
914
+ reject(error);
500
915
  }
501
916
  };
502
917
  });
503
918
  }
919
+ /**
920
+ * Start periodic keepalive pings on the pooled connection.
921
+ * Uses the ws package's ping() in Node.js; silently skips in browsers
922
+ * where WebSocket doesn't expose a ping method.
923
+ */
924
+ startKeepalive(ws) {
925
+ this.stopKeepalive();
926
+ const intervalMs = this.client.keepalivePingInterval;
927
+ if (intervalMs == null || intervalMs <= 0) return;
928
+ this.keepaliveTimer = setInterval(() => {
929
+ if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
930
+ this.stopKeepalive();
931
+ return;
932
+ }
933
+ if (typeof ws.ping === "function") {
934
+ ws.ping();
935
+ }
936
+ }, intervalMs);
937
+ }
938
+ stopKeepalive() {
939
+ if (this.keepaliveTimer !== null) {
940
+ clearInterval(this.keepaliveTimer);
941
+ this.keepaliveTimer = null;
942
+ }
943
+ }
504
944
  /**
505
945
  * Close the pooled WebSocket connection.
506
946
  */
507
947
  close() {
948
+ this.stopKeepalive();
508
949
  if (this.wsConnection) {
509
950
  try {
510
951
  this.wsConnection.close();
@@ -514,15 +955,39 @@ var TTSResource = class {
514
955
  this.wsUrl = null;
515
956
  }
516
957
  }
517
- parseError(message) {
518
- const lower = message.toLowerCase();
519
- if (lower.includes("auth") || lower.includes("unauthorized")) {
520
- return new AuthenticationError(message);
521
- }
522
- if (lower.includes("credit")) {
523
- return new InsufficientCreditsError(message);
524
- }
525
- return new KugelAudioError(message);
958
+ parseError(data) {
959
+ return classifyWsFrame(data);
960
+ }
961
+ /**
962
+ * Create a streaming session for LLM integration.
963
+ *
964
+ * The session connects to `/ws/tts/stream` and keeps a persistent
965
+ * connection across multiple {@link StreamingSession.send} calls.
966
+ * The server auto-chunks text at sentence boundaries — no client-side
967
+ * flushing required.
968
+ *
969
+ * @param config - Session configuration (voice, model, chunking strategy).
970
+ * @param callbacks - Callbacks for audio chunks and session lifecycle events.
971
+ * @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
972
+ *
973
+ * @example
974
+ * ```typescript
975
+ * const session = client.tts.streamingSession(
976
+ * { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
977
+ * { onChunk: (chunk) => playAudio(chunk.audio) },
978
+ * );
979
+ *
980
+ * session.connect();
981
+ *
982
+ * for await (const token of llmStream) {
983
+ * session.send(token);
984
+ * }
985
+ *
986
+ * await session.close();
987
+ * ```
988
+ */
989
+ streamingSession(config, callbacks) {
990
+ return new StreamingSession(this.client, config, callbacks);
526
991
  }
527
992
  /**
528
993
  * Create a multi-context session for concurrent TTS streams.
@@ -542,7 +1007,7 @@ var TTSResource = class {
542
1007
  * console.log(`Audio from ${chunk.contextId}`);
543
1008
  * playAudio(chunk.audio);
544
1009
  * },
545
- * onContextFinal: (contextId) => {
1010
+ * onContextClosed: (contextId) => {
546
1011
  * console.log(`${contextId} finished`);
547
1012
  * },
548
1013
  * });
@@ -581,6 +1046,11 @@ var MultiContextSession = class {
581
1046
  }
582
1047
  /**
583
1048
  * Connect to the multi-context WebSocket endpoint.
1049
+ *
1050
+ * The returned promise resolves once the WebSocket is OPEN so callers can
1051
+ * ``await session.connect(callbacks)`` before invoking
1052
+ * {@link createContext} / {@link send}. Pre-open errors reject with the
1053
+ * typed error.
584
1054
  */
585
1055
  connect(callbacks) {
586
1056
  this.callbacks = callbacks;
@@ -595,9 +1065,8 @@ var MultiContextSession = class {
595
1065
  }
596
1066
  const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
597
1067
  this.ws = createWs(url);
598
- this.ws.onopen = () => {
599
- };
600
- this.ws.onmessage = (event) => {
1068
+ const ws = this.ws;
1069
+ ws.onmessage = (event) => {
601
1070
  try {
602
1071
  const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
603
1072
  const data = JSON.parse(messageData);
@@ -628,9 +1097,6 @@ var MultiContextSession = class {
628
1097
  };
629
1098
  this.callbacks.onChunk?.(chunk);
630
1099
  }
631
- if (data.is_final) {
632
- this.callbacks.onContextFinal?.(data.context_id);
633
- }
634
1100
  if (data.context_closed) {
635
1101
  this.contexts.delete(data.context_id);
636
1102
  this.callbacks.onContextClosed?.(data.context_id);
@@ -646,19 +1112,38 @@ var MultiContextSession = class {
646
1112
  console.error("Failed to parse WebSocket message:", e);
647
1113
  }
648
1114
  };
649
- this.ws.onerror = () => {
650
- this.callbacks.onError?.(new KugelAudioError("WebSocket connection error"));
651
- };
652
- this.ws.onclose = (event) => {
653
- if (event.code === 4001) {
654
- this.callbacks.onError?.(new AuthenticationError("Authentication failed"));
655
- } else if (event.code === 4003) {
656
- this.callbacks.onError?.(new InsufficientCreditsError("Insufficient credits"));
657
- }
658
- this.ws = null;
659
- this.isStarted = false;
660
- this.contexts.clear();
661
- };
1115
+ return new Promise((resolve, reject) => {
1116
+ let opened = false;
1117
+ ws.onopen = () => {
1118
+ opened = true;
1119
+ resolve();
1120
+ };
1121
+ ws.onerror = (event) => {
1122
+ const underlying = event?.error ?? event;
1123
+ const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
1124
+ "KugelAudio multi-context WebSocket connection error. Check network connectivity."
1125
+ );
1126
+ if (!opened) reject(err);
1127
+ this.callbacks.onError?.(err);
1128
+ };
1129
+ ws.onclose = (event) => {
1130
+ let typedErr = null;
1131
+ if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
1132
+ typedErr = classifyWsClose(event.code, event.reason);
1133
+ this.callbacks.onError?.(typedErr);
1134
+ }
1135
+ if (!opened) {
1136
+ reject(
1137
+ typedErr ?? new ConnectionError(
1138
+ `KugelAudio multi-context WebSocket closed before ready (code ${event.code}).`
1139
+ )
1140
+ );
1141
+ }
1142
+ this.ws = null;
1143
+ this.isStarted = false;
1144
+ this.contexts.clear();
1145
+ };
1146
+ });
662
1147
  }
663
1148
  /**
664
1149
  * Create a new context with optional voice settings.
@@ -672,10 +1157,13 @@ var MultiContextSession = class {
672
1157
  context_id: contextId
673
1158
  };
674
1159
  if (!this.isStarted) {
1160
+ warnIfNoLanguage(this.config.language, this.config.normalize);
675
1161
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
676
1162
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1163
+ if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
677
1164
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
678
1165
  if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
1166
+ if (this.config.language) msg.language = this.config.language;
679
1167
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
680
1168
  }
681
1169
  const voiceId = options?.voiceId || this.config.defaultVoiceId;
@@ -762,18 +1250,271 @@ var MultiContextSession = class {
762
1250
  return this.ws !== null && this.ws.readyState === WS_OPEN;
763
1251
  }
764
1252
  };
1253
+ var StreamingSession = class {
1254
+ constructor(client, config, callbacks) {
1255
+ this.ws = null;
1256
+ this.configSent = false;
1257
+ this.client = client;
1258
+ this.config = config;
1259
+ this.callbacks = callbacks;
1260
+ }
1261
+ /**
1262
+ * Open the WebSocket connection and authenticate.
1263
+ *
1264
+ * The returned promise resolves once the WebSocket is OPEN, so callers can
1265
+ * ``await session.connect()`` and then ``send()`` without racing the
1266
+ * handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
1267
+ * the promise with the typed error.
1268
+ */
1269
+ connect() {
1270
+ const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
1271
+ let authParam;
1272
+ if (this.client.isToken) {
1273
+ authParam = "token";
1274
+ } else if (this.client.isMasterKey) {
1275
+ authParam = "master_key";
1276
+ } else {
1277
+ authParam = "api_key";
1278
+ }
1279
+ const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1280
+ this.ws = createWs(url);
1281
+ const ws = this.ws;
1282
+ ws.onmessage = (event) => {
1283
+ try {
1284
+ const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
1285
+ const data = JSON.parse(messageData);
1286
+ if (data.error) {
1287
+ this.callbacks.onError?.(new KugelAudioError(data.error));
1288
+ return;
1289
+ }
1290
+ if (data.audio) {
1291
+ const chunk = {
1292
+ audio: data.audio,
1293
+ encoding: data.enc || "pcm_s16le",
1294
+ index: data.idx,
1295
+ sampleRate: data.sr,
1296
+ samples: data.samples
1297
+ };
1298
+ this.callbacks.onChunk?.(chunk);
1299
+ }
1300
+ if (data.word_timestamps) {
1301
+ const timestamps = data.word_timestamps.map((w) => ({
1302
+ word: w.word,
1303
+ startMs: w.start_ms,
1304
+ endMs: w.end_ms,
1305
+ charStart: w.char_start,
1306
+ charEnd: w.char_end,
1307
+ score: w.score ?? 1
1308
+ }));
1309
+ this.callbacks.onWordTimestamps?.(timestamps);
1310
+ }
1311
+ if (data.chunk_complete) {
1312
+ this.callbacks.onChunkComplete?.(
1313
+ data.chunk_id ?? 0,
1314
+ data.audio_seconds ?? 0,
1315
+ data.gen_ms ?? 0
1316
+ );
1317
+ }
1318
+ if (data.generation_started) {
1319
+ this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
1320
+ }
1321
+ if (data.session_closed) {
1322
+ this.callbacks.onSessionClosed?.(
1323
+ data.total_audio_seconds ?? 0,
1324
+ data.total_text_chunks ?? 0,
1325
+ data.total_audio_chunks ?? 0
1326
+ );
1327
+ }
1328
+ } catch (e) {
1329
+ console.error("[KugelAudio] Failed to parse streaming session message:", e);
1330
+ }
1331
+ };
1332
+ return new Promise((resolve, reject) => {
1333
+ let opened = false;
1334
+ ws.onopen = () => {
1335
+ opened = true;
1336
+ resolve();
1337
+ };
1338
+ ws.onerror = (event) => {
1339
+ const underlying = event?.error ?? event;
1340
+ const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
1341
+ "KugelAudio streaming WebSocket connection error. Check network connectivity."
1342
+ );
1343
+ if (!opened) reject(err);
1344
+ this.callbacks.onError?.(err);
1345
+ };
1346
+ ws.onclose = (event) => {
1347
+ let typedErr = null;
1348
+ if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
1349
+ typedErr = classifyWsClose(event.code, event.reason);
1350
+ this.callbacks.onError?.(typedErr);
1351
+ }
1352
+ if (!opened) {
1353
+ reject(
1354
+ typedErr ?? new ConnectionError(
1355
+ `KugelAudio streaming WebSocket closed before ready (code ${event.code}).`
1356
+ )
1357
+ );
1358
+ }
1359
+ this.ws = null;
1360
+ this.configSent = false;
1361
+ };
1362
+ });
1363
+ }
1364
+ /**
1365
+ * Send a text chunk to the server (e.g. one LLM output token).
1366
+ *
1367
+ * The server buffers text across multiple calls and starts generating at
1368
+ * natural sentence boundaries automatically — no need to call `flush`.
1369
+ *
1370
+ * @param text - Raw text or LLM token to append to the server buffer.
1371
+ * @param flush - Force immediate generation of whatever is buffered.
1372
+ * **Avoid calling this per-sentence from the client.** Doing so bypasses
1373
+ * the server's semantic chunking, incurs a fresh model prefill cost on
1374
+ * every flush, and makes latency *worse*, not better. Let the server
1375
+ * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1376
+ */
1377
+ send(text, flush = false) {
1378
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
1379
+ throw new KugelAudioError("StreamingSession not connected. Call connect() first.");
1380
+ }
1381
+ const msg = { text, flush };
1382
+ if (!this.configSent) {
1383
+ if (this.config.voiceId !== void 0) msg.voice_id = this.config.voiceId;
1384
+ if (this.config.modelId !== void 0) msg.model_id = this.config.modelId;
1385
+ if (this.config.cfgScale !== void 0) msg.cfg_scale = this.config.cfgScale;
1386
+ if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
1387
+ if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
1388
+ if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
1389
+ if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1390
+ if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
1391
+ if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
1392
+ if (this.config.language !== void 0) msg.language = this.config.language;
1393
+ if (this.config.wordTimestamps) msg.word_timestamps = true;
1394
+ if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
1395
+ if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1396
+ if (this.config.speed !== void 0) msg.speed = this.config.speed;
1397
+ this.configSent = true;
1398
+ }
1399
+ this.ws.send(JSON.stringify(msg));
1400
+ }
1401
+ /**
1402
+ * End the current session but keep the WebSocket connection open.
1403
+ *
1404
+ * This allows starting a new session on the same connection, avoiding
1405
+ * the overhead of a new WebSocket handshake (~200-300ms). After calling
1406
+ * this, optionally call {@link updateConfig} to change voice/model settings,
1407
+ * then call {@link send} to start the next session.
1408
+ *
1409
+ * The returned promise resolves once the server confirms with a
1410
+ * `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
1411
+ * elapse without *any* server message arriving. The timer resets on every
1412
+ * incoming frame so a long final flush that streams audio for tens of
1413
+ * seconds is not truncated; only a genuinely silent server trips the fuse.
1414
+ */
1415
+ endSession() {
1416
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1417
+ const ws = this.ws;
1418
+ const QUIET_TIMEOUT_MS = 15e3;
1419
+ return new Promise((resolve) => {
1420
+ let settled = false;
1421
+ let timer;
1422
+ const prevMessage = ws.onmessage;
1423
+ const prevClose = ws.onclose;
1424
+ const done = () => {
1425
+ if (settled) return;
1426
+ settled = true;
1427
+ clearTimeout(timer);
1428
+ ws.onmessage = prevMessage;
1429
+ ws.onclose = prevClose;
1430
+ this.configSent = false;
1431
+ resolve();
1432
+ };
1433
+ const armQuietTimer = () => {
1434
+ clearTimeout(timer);
1435
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1436
+ };
1437
+ armQuietTimer();
1438
+ ws.onmessage = (event) => {
1439
+ armQuietTimer();
1440
+ if (prevMessage) prevMessage.call(ws, event);
1441
+ try {
1442
+ const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
1443
+ if (JSON.parse(raw).session_closed) done();
1444
+ } catch {
1445
+ }
1446
+ };
1447
+ ws.onclose = (event) => {
1448
+ this.ws = null;
1449
+ if (prevClose) prevClose.call(ws, event);
1450
+ done();
1451
+ };
1452
+ ws.send(JSON.stringify({ close: true }));
1453
+ });
1454
+ }
1455
+ /**
1456
+ * Update session configuration for the next session.
1457
+ *
1458
+ * Call this after {@link endSession} and before the next {@link send}
1459
+ * to change voice, model, language, or other settings.
1460
+ */
1461
+ updateConfig(config) {
1462
+ Object.assign(this.config, config);
1463
+ this.configSent = false;
1464
+ }
1465
+ /**
1466
+ * Close the session and the WebSocket connection.
1467
+ *
1468
+ * For session reuse without closing the connection, use
1469
+ * {@link endSession} instead.
1470
+ *
1471
+ * The returned promise resolves once the server confirms the close with a
1472
+ * `session_closed` message, or after a 15 s **quiet** timeout (no traffic
1473
+ * from the server in that window). Audio frames from the server-side
1474
+ * final-flush of the still-buffered text are delivered to your callbacks
1475
+ * before this promise resolves, and each frame resets the quiet timer.
1476
+ */
1477
+ async close() {
1478
+ await this.endSession();
1479
+ if (this.ws) {
1480
+ try {
1481
+ this.ws.close();
1482
+ } catch {
1483
+ }
1484
+ this.ws = null;
1485
+ }
1486
+ }
1487
+ /** Whether the underlying WebSocket is open. */
1488
+ get isConnected() {
1489
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
1490
+ }
1491
+ };
765
1492
  var KugelAudio = class _KugelAudio {
766
1493
  constructor(options) {
767
1494
  if (!options.apiKey) {
768
- throw new Error("API key is required");
1495
+ throw new ValidationError(
1496
+ "KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY environment variable or pass { apiKey: ... } to the client. Get a key at https://app.kugelaudio.com/settings/api-keys."
1497
+ );
769
1498
  }
770
- this._apiKey = options.apiKey;
1499
+ const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
1500
+ this._apiKey = cleanKey;
771
1501
  this._isMasterKey = options.isMasterKey || false;
772
1502
  this._isToken = options.isToken || false;
773
1503
  this._orgId = options.orgId;
774
- this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, "");
1504
+ if (options.apiUrl) {
1505
+ this._apiUrl = options.apiUrl.replace(/\/$/, "");
1506
+ } else {
1507
+ const effectiveRegion = options.region || detectedRegion || "eu";
1508
+ if (!(effectiveRegion in REGION_URLS)) {
1509
+ throw new ValidationError(
1510
+ `Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(", ")}.`
1511
+ );
1512
+ }
1513
+ this._apiUrl = REGION_URLS[effectiveRegion];
1514
+ }
775
1515
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
776
1516
  this._timeout = options.timeout || 6e4;
1517
+ this._keepalivePingInterval = options.keepalivePingInterval !== void 0 ? options.keepalivePingInterval : 2e4;
777
1518
  this.models = new ModelsResource(this);
778
1519
  this.voices = new VoicesResource(this);
779
1520
  this.tts = new TTSResource(this);
@@ -819,6 +1560,10 @@ var KugelAudio = class _KugelAudio {
819
1560
  get ttsUrl() {
820
1561
  return this._ttsUrl;
821
1562
  }
1563
+ /** Get keepalive ping interval in milliseconds, or null if disabled. */
1564
+ get keepalivePingInterval() {
1565
+ return this._keepalivePingInterval;
1566
+ }
822
1567
  /**
823
1568
  * Close the client and release resources.
824
1569
  * This closes any pooled WebSocket connections.
@@ -873,25 +1618,49 @@ var KugelAudio = class _KugelAudio {
873
1618
  signal: controller.signal
874
1619
  });
875
1620
  clearTimeout(timeoutId);
876
- if (response.status === 401) {
877
- throw new AuthenticationError("Invalid API key");
1621
+ if (!response.ok) {
1622
+ const text = await response.text();
1623
+ throw classifyHttpError(response.status, text, response.headers);
878
1624
  }
879
- if (response.status === 403) {
880
- throw new InsufficientCreditsError("Access denied");
1625
+ return await response.json();
1626
+ } catch (error) {
1627
+ clearTimeout(timeoutId);
1628
+ if (error instanceof KugelAudioError) {
1629
+ throw error;
881
1630
  }
882
- if (response.status === 429) {
883
- throw new RateLimitError("Rate limit exceeded");
1631
+ if (error.name === "AbortError") {
1632
+ throw new ConnectionError(
1633
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`
1634
+ );
884
1635
  }
1636
+ throw new ConnectionError(
1637
+ `Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
1638
+ );
1639
+ }
1640
+ }
1641
+ /**
1642
+ * Make a multipart/form-data request (for file uploads).
1643
+ * @internal Used by VoicesResource for reference file uploads.
1644
+ */
1645
+ async requestMultipart(method, path, formData) {
1646
+ const url = `${this._apiUrl}${path}`;
1647
+ const headers = {
1648
+ "X-API-Key": this._apiKey,
1649
+ "Authorization": `Bearer ${this._apiKey}`
1650
+ };
1651
+ const controller = new AbortController();
1652
+ const timeoutId = setTimeout(() => controller.abort(), this._timeout);
1653
+ try {
1654
+ const response = await fetch(url, {
1655
+ method,
1656
+ headers,
1657
+ body: formData,
1658
+ signal: controller.signal
1659
+ });
1660
+ clearTimeout(timeoutId);
885
1661
  if (!response.ok) {
886
1662
  const text = await response.text();
887
- let message = `HTTP ${response.status}`;
888
- try {
889
- const json = JSON.parse(text);
890
- message = json.detail || json.error || message;
891
- } catch {
892
- message = text || message;
893
- }
894
- throw new KugelAudioError(message, response.status);
1663
+ throw classifyHttpError(response.status, text, response.headers);
895
1664
  }
896
1665
  return await response.json();
897
1666
  } catch (error) {
@@ -900,21 +1669,31 @@ var KugelAudio = class _KugelAudio {
900
1669
  throw error;
901
1670
  }
902
1671
  if (error.name === "AbortError") {
903
- throw new KugelAudioError("Request timed out");
1672
+ throw new ConnectionError(
1673
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`
1674
+ );
904
1675
  }
905
- throw new KugelAudioError(`Request failed: ${error.message}`);
1676
+ throw new ConnectionError(
1677
+ `Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
1678
+ );
906
1679
  }
907
1680
  }
908
1681
  };
909
1682
  export {
910
1683
  AuthenticationError,
911
1684
  ConnectionError,
1685
+ ErrorCodes,
912
1686
  InsufficientCreditsError,
913
1687
  KugelAudio,
914
1688
  KugelAudioError,
915
1689
  RateLimitError,
916
1690
  ValidationError,
1691
+ WsCloseCodes,
917
1692
  base64ToArrayBuffer,
1693
+ classifyHttpError,
1694
+ classifyWsClose,
1695
+ classifyWsFrame,
1696
+ classifyWsHandshakeError,
918
1697
  createWavBlob,
919
1698
  createWavFile,
920
1699
  decodePCM16