kugelaudio 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/client.ts CHANGED
@@ -3,25 +3,52 @@
3
3
  */
4
4
 
5
5
  import {
6
- AuthenticationError,
7
- InsufficientCreditsError,
6
+ ConnectionError,
8
7
  KugelAudioError,
9
- RateLimitError,
8
+ ValidationError,
9
+ classifyHttpError,
10
+ classifyWsClose,
11
+ classifyWsFrame,
12
+ classifyWsHandshakeError,
10
13
  } from './errors';
11
14
  import type {
12
15
  AudioChunk,
13
16
  AudioResponse,
17
+ CreateVoiceOptions,
14
18
  GenerateOptions,
15
19
  GenerationStats,
16
20
  KugelAudioOptions,
17
21
  Model,
18
22
  StreamCallbacks,
19
- Voice
23
+ StreamConfig,
24
+ StreamingSessionCallbacks,
25
+ UpdateVoiceOptions,
26
+ VoiceDetail,
27
+ VoiceListResponse,
28
+ VoiceReference,
29
+ WordTimestamp
20
30
  } from './types';
21
31
  import { base64ToArrayBuffer } from './utils';
22
32
  import { getWebSocket } from './websocket';
23
33
 
24
- const DEFAULT_API_URL = 'https://api.kugelaudio.com';
34
+ import type { Region } from './types';
35
+
36
+ const REGION_URLS: Record<Region, string> = {
37
+ eu: 'https://api.kugelaudio.com',
38
+ us: 'https://us-api.kugelaudio.com',
39
+ global: 'https://global-api.kugelaudio.com',
40
+ };
41
+
42
+ const REGION_PREFIXES = ['eu-', 'us-', 'global-'] as const;
43
+
44
+ function parseApiKey(apiKey: string): { cleanKey: string; detectedRegion?: Region } {
45
+ for (const prefix of REGION_PREFIXES) {
46
+ if (apiKey.startsWith(prefix)) {
47
+ return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) as Region };
48
+ }
49
+ }
50
+ return { cleanKey: apiKey };
51
+ }
25
52
 
26
53
  /**
27
54
  * Create a new WebSocket instance.
@@ -36,6 +63,23 @@ function createWs(url: string): WebSocket {
36
63
  /** WebSocket OPEN readyState constant. */
37
64
  const WS_OPEN = 1;
38
65
 
66
+ let _languageWarningLogged = false;
67
+
68
+ function warnIfNoLanguage(
69
+ language: string | undefined,
70
+ normalize: boolean | undefined
71
+ ): void {
72
+ const normEnabled = normalize === undefined || normalize;
73
+ if (!language && normEnabled && !_languageWarningLogged) {
74
+ _languageWarningLogged = true;
75
+ console.warn(
76
+ "[KugelAudio] No 'language' set with normalization enabled — the server " +
77
+ 'will auto-detect the language, adding ~60-150ms to TTFA. Set language ' +
78
+ "(e.g., language: 'en') for optimal latency."
79
+ );
80
+ }
81
+ }
82
+
39
83
  /**
40
84
  * Models resource for listing TTS models.
41
85
  */
@@ -71,52 +115,211 @@ class VoicesResource {
71
115
  language?: string;
72
116
  includePublic?: boolean;
73
117
  limit?: number;
74
- }): Promise<Voice[]> {
118
+ offset?: number;
119
+ }): Promise<VoiceListResponse> {
75
120
  const params = new URLSearchParams();
76
121
  if (options?.language) params.set('language', options.language);
77
122
  if (options?.includePublic !== undefined) {
78
123
  params.set('include_public', String(options.includePublic));
79
124
  }
80
125
  if (options?.limit) params.set('limit', String(options.limit));
126
+ if (options?.offset) params.set('offset', String(options.offset));
81
127
 
82
128
  const query = params.toString();
83
129
  const path = query ? `/v1/voices?${query}` : '/v1/voices';
84
- const response = await this.client.request<{ voices: any[] }>('GET', path);
130
+ const response = await this.client.request<{ voices: any[]; total: number; limit: number; offset: number }>('GET', path);
85
131
 
86
- return response.voices.map((v) => ({
87
- id: v.id,
88
- name: v.name,
89
- description: v.description,
90
- category: v.category,
91
- sex: v.sex,
92
- age: v.age,
93
- supportedLanguages: v.supported_languages || [],
94
- sampleText: v.sample_text,
95
- avatarUrl: v.avatar_url,
96
- sampleUrl: v.sample_url,
97
- isPublic: v.is_public || false,
98
- verified: v.verified || false,
99
- }));
132
+ return {
133
+ voices: response.voices.map((v) => ({
134
+ id: v.id,
135
+ name: v.name,
136
+ description: v.description,
137
+ category: v.category,
138
+ sex: v.sex,
139
+ age: v.age,
140
+ supportedLanguages: v.supported_languages || [],
141
+ sampleText: v.sample_text,
142
+ avatarUrl: v.avatar_url,
143
+ sampleUrl: v.sample_url,
144
+ isPublic: v.is_public || false,
145
+ verified: v.verified || false,
146
+ })),
147
+ total: response.total,
148
+ limit: response.limit,
149
+ offset: response.offset,
150
+ };
100
151
  }
101
152
 
102
153
  /**
103
154
  * Get a specific voice by ID.
104
155
  */
105
- async get(voiceId: number): Promise<Voice> {
156
+ async get(voiceId: number): Promise<VoiceDetail> {
106
157
  const v = await this.client.request<any>('GET', `/v1/voices/${voiceId}`);
158
+ return this.mapVoiceDetail(v);
159
+ }
160
+
161
+ /**
162
+ * Create a new voice.
163
+ */
164
+ async create(options: CreateVoiceOptions): Promise<VoiceDetail> {
165
+ const metadata = {
166
+ name: options.name,
167
+ sex: options.sex,
168
+ description: options.description ?? '',
169
+ category: options.category ?? 'conversational',
170
+ age: options.age ?? 'middle_age',
171
+ quality: options.quality ?? 'mid',
172
+ supported_languages: options.supportedLanguages ?? ['en'],
173
+ is_public: options.isPublic ?? false,
174
+ sample_text: options.sampleText ?? '',
175
+ };
176
+
177
+ const formData = new FormData();
178
+ formData.append(
179
+ 'metadata',
180
+ new Blob([JSON.stringify(metadata)], { type: 'application/json' }),
181
+ );
182
+
183
+ if (options.referenceFiles) {
184
+ for (const file of options.referenceFiles) {
185
+ formData.append('files', file);
186
+ }
187
+ }
188
+
189
+ const v = await this.client.requestMultipart<any>('POST', '/v1/voices', formData);
190
+ return this.mapVoiceDetail(v);
191
+ }
192
+
193
+ /**
194
+ * Update an existing voice. Only provided fields are updated.
195
+ */
196
+ async update(voiceId: number, options: UpdateVoiceOptions): Promise<VoiceDetail> {
197
+ const payload: Record<string, unknown> = {};
198
+ if (options.name !== undefined) payload.name = options.name;
199
+ if (options.description !== undefined) payload.description = options.description;
200
+ if (options.category !== undefined) payload.category = options.category;
201
+ if (options.age !== undefined) payload.age = options.age;
202
+ if (options.sex !== undefined) payload.sex = options.sex;
203
+ if (options.quality !== undefined) payload.quality = options.quality;
204
+ if (options.supportedLanguages !== undefined) payload.supported_languages = options.supportedLanguages;
205
+ if (options.isPublic !== undefined) payload.is_public = options.isPublic;
206
+ if (options.sampleText !== undefined) payload.sample_text = options.sampleText;
207
+
208
+ const v = await this.client.request<any>('PATCH', `/v1/voices/${voiceId}`, payload);
209
+ return this.mapVoiceDetail(v);
210
+ }
211
+
212
+ /**
213
+ * Delete a voice.
214
+ */
215
+ async delete(voiceId: number): Promise<void> {
216
+ await this.client.request<any>('DELETE', `/v1/voices/${voiceId}`);
217
+ }
218
+
219
+ // -- Reference management --
220
+
221
+ /**
222
+ * List reference audio files for a voice.
223
+ */
224
+ async listReferences(voiceId: number): Promise<VoiceReference[]> {
225
+ const response = await this.client.request<{ references: any[] }>(
226
+ 'GET',
227
+ `/v1/voices/${voiceId}/references`,
228
+ );
229
+ return response.references.map((r) => this.mapVoiceReference(r));
230
+ }
231
+
232
+ /**
233
+ * Upload a reference audio file to a voice.
234
+ *
235
+ * @param voiceId - Voice ID
236
+ * @param file - Audio file (File in browser, Blob in Node.js)
237
+ * @param referenceText - Optional transcript of the reference audio
238
+ */
239
+ async addReference(
240
+ voiceId: number,
241
+ file: File | Blob,
242
+ referenceText?: string,
243
+ ): Promise<VoiceReference> {
244
+ const formData = new FormData();
245
+ formData.append('file', file);
246
+ if (referenceText) {
247
+ formData.append('reference_text', referenceText);
248
+ }
249
+
250
+ const r = await this.client.requestMultipart<any>(
251
+ 'POST',
252
+ `/v1/voices/${voiceId}/references`,
253
+ formData,
254
+ );
255
+ return this.mapVoiceReference(r);
256
+ }
257
+
258
+ /**
259
+ * Delete a reference audio file from a voice.
260
+ */
261
+ async deleteReference(voiceId: number, referenceId: number): Promise<void> {
262
+ await this.client.request<any>(
263
+ 'DELETE',
264
+ `/v1/voices/${voiceId}/references/${referenceId}`,
265
+ );
266
+ }
267
+
268
+ // -- Publishing --
269
+
270
+ /**
271
+ * Request publication of a voice. Sets it as public and marks it
272
+ * as pending verification by an admin.
273
+ */
274
+ async publish(voiceId: number): Promise<VoiceDetail> {
275
+ const v = await this.client.request<any>('POST', `/v1/voices/${voiceId}/publish`);
276
+ return this.mapVoiceDetail(v);
277
+ }
278
+
279
+ // -- Sample generation --
280
+
281
+ /**
282
+ * Trigger sample audio generation for a voice.
283
+ */
284
+ async generateSample(voiceId: number): Promise<VoiceDetail> {
285
+ const v = await this.client.request<any>(
286
+ 'POST',
287
+ `/v1/voices/${voiceId}/generate-sample`,
288
+ );
289
+ return this.mapVoiceDetail(v);
290
+ }
291
+
292
+ // -- Helpers --
293
+
294
+ private mapVoiceDetail(v: any): VoiceDetail {
107
295
  return {
108
296
  id: v.id,
109
297
  name: v.name,
110
- description: v.description,
111
- category: v.category,
112
- sex: v.sex,
298
+ description: v.description ?? '',
299
+ generativeVoiceDescription: v.generative_voice_description ?? '',
300
+ supportedLanguages: v.supported_languages ?? [],
301
+ category: v.category ?? 'cloned',
113
302
  age: v.age,
114
- supportedLanguages: v.supported_languages || [],
115
- sampleText: v.sample_text,
116
- avatarUrl: v.avatar_url,
303
+ sex: v.sex,
304
+ quality: v.quality ?? 'mid',
305
+ isPublic: v.is_public ?? false,
306
+ verified: v.verified ?? false,
307
+ pendingVerification: v.pending_verification ?? false,
117
308
  sampleUrl: v.sample_url,
118
- isPublic: v.is_public || false,
119
- verified: v.verified || false,
309
+ avatarUrl: v.avatar_url,
310
+ sampleText: v.sample_text ?? '',
311
+ };
312
+ }
313
+
314
+ private mapVoiceReference(r: any): VoiceReference {
315
+ return {
316
+ id: r.id,
317
+ voiceId: r.voice_id,
318
+ name: r.name ?? '',
319
+ referenceText: r.reference_text ?? '',
320
+ s3Path: r.s3_path ?? '',
321
+ audioUrl: r.audio_url,
322
+ isGenerated: r.is_generated ?? false,
120
323
  };
121
324
  }
122
325
  }
@@ -134,6 +337,7 @@ class TTSResource {
134
337
  reject: (error: Error) => void;
135
338
  }> = new Map();
136
339
  private requestCounter = 0;
340
+ private keepaliveTimer: ReturnType<typeof setInterval> | null = null;
137
341
 
138
342
  constructor(private client: KugelAudio) {}
139
343
 
@@ -172,11 +376,15 @@ class TTSResource {
172
376
  async generate(options: GenerateOptions): Promise<AudioResponse> {
173
377
  const chunks: ArrayBuffer[] = [];
174
378
  let finalStats: GenerationStats | undefined;
379
+ const allTimestamps: WordTimestamp[] = [];
175
380
 
176
381
  await this.stream(options, {
177
382
  onChunk: (chunk) => {
178
383
  chunks.push(base64ToArrayBuffer(chunk.audio));
179
384
  },
385
+ onWordTimestamps: (timestamps) => {
386
+ allTimestamps.push(...timestamps);
387
+ },
180
388
  onFinal: (stats) => {
181
389
  finalStats = stats;
182
390
  },
@@ -198,9 +406,71 @@ class TTSResource {
198
406
  durationMs: finalStats ? finalStats.durationMs : 0,
199
407
  generationMs: finalStats ? finalStats.generationMs : 0,
200
408
  rtf: finalStats ? finalStats.rtf : 0,
409
+ wordTimestamps: allTimestamps,
201
410
  };
202
411
  }
203
412
 
413
+ /**
414
+ * Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
415
+ *
416
+ * **Node.js only** — this method requires the `stream` built-in module and is
417
+ * intended for server-side integrations such as Vapi custom TTS endpoints,
418
+ * Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
419
+ *
420
+ * Compared to manually wiring `onChunk` to a `Readable`, this method avoids
421
+ * a common race-condition: the stream object is created and returned **before**
422
+ * any chunks arrive, so the caller can safely pipe or attach listeners before
423
+ * the first audio byte is pushed.
424
+ *
425
+ * @example Vapi custom TTS endpoint
426
+ * ```typescript
427
+ * app.post('/synthesize', (req, res) => {
428
+ * res.setHeader('Content-Type', 'audio/pcm');
429
+ * res.setHeader('Transfer-Encoding', 'chunked');
430
+ *
431
+ * const readable = client.tts.toReadable({
432
+ * text: req.body.message.text,
433
+ * modelId: 'kugel-1-turbo',
434
+ * sampleRate: req.body.message.sampleRate,
435
+ * language: 'en',
436
+ * });
437
+ *
438
+ * readable.pipe(res);
439
+ * });
440
+ * ```
441
+ *
442
+ * @param options - TTS generation options (same as `stream()`)
443
+ * @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
444
+ * @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
445
+ */
446
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
447
+ toReadable(options: GenerateOptions, reuseConnection = true): any {
448
+ // Dynamic require keeps browser bundles free of Node.js built-ins.
449
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
450
+ const { Readable } = require('stream') as typeof import('stream');
451
+ const readable = new Readable({ read() {} });
452
+
453
+ this.stream(
454
+ options,
455
+ {
456
+ onChunk: (chunk: AudioChunk) => {
457
+ readable.push(Buffer.from(chunk.audio, 'base64'));
458
+ },
459
+ onFinal: () => {
460
+ readable.push(null);
461
+ },
462
+ onError: (error: Error) => {
463
+ readable.destroy(error);
464
+ },
465
+ },
466
+ reuseConnection
467
+ ).catch((error: Error) => {
468
+ readable.destroy(error);
469
+ });
470
+
471
+ return readable;
472
+ }
473
+
204
474
  /**
205
475
  * Build the WebSocket URL with appropriate auth param.
206
476
  */
@@ -259,11 +529,20 @@ class TTSResource {
259
529
  this.wsConnection = ws;
260
530
  this.wsUrl = url;
261
531
  this.setupMessageHandler(ws);
532
+ this.startKeepalive(ws);
262
533
  resolve(ws);
263
534
  };
264
535
 
265
- ws.onerror = () => {
266
- reject(new KugelAudioError('WebSocket connection error'));
536
+ ws.onerror = (event: unknown) => {
537
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
538
+ const typed = classifyWsHandshakeError(underlying);
539
+ reject(
540
+ typed ??
541
+ new ConnectionError(
542
+ `Could not establish KugelAudio WebSocket connection to ${url}. ` +
543
+ 'Check network connectivity.',
544
+ ),
545
+ );
267
546
  };
268
547
  });
269
548
  }
@@ -287,7 +566,7 @@ class TTSResource {
287
566
  if (!pending) return;
288
567
 
289
568
  if (data.error) {
290
- const error = this.parseError(data.error);
569
+ const error = this.parseError(data);
291
570
  pending.callbacks.onError?.(error);
292
571
  this.pendingRequests.delete(requestId);
293
572
  pending.reject(error);
@@ -301,7 +580,6 @@ class TTSResource {
301
580
  totalSamples: data.total_samples,
302
581
  durationMs: data.dur_ms,
303
582
  generationMs: data.gen_ms,
304
- ttfaMs: data.ttfa_ms,
305
583
  rtf: data.rtf,
306
584
  error: data.error,
307
585
  };
@@ -321,23 +599,45 @@ class TTSResource {
321
599
  };
322
600
  pending.callbacks.onChunk?.(chunk);
323
601
  }
602
+
603
+ if (data.word_timestamps) {
604
+ const timestamps: WordTimestamp[] = data.word_timestamps.map(
605
+ (w: Record<string, unknown>) => ({
606
+ word: w.word as string,
607
+ startMs: w.start_ms as number,
608
+ endMs: w.end_ms as number,
609
+ charStart: w.char_start as number,
610
+ charEnd: w.char_end as number,
611
+ score: (w.score as number) ?? 1.0,
612
+ })
613
+ );
614
+ pending.callbacks.onWordTimestamps?.(timestamps);
615
+ }
324
616
  } catch (e) {
325
617
  console.error('Failed to parse WebSocket message:', e);
326
618
  }
327
619
  };
328
620
 
329
621
  ws.onclose = (event) => {
330
- // Clear connection pool
622
+ // Clear connection pool and keepalive
623
+ this.stopKeepalive();
331
624
  this.wsConnection = null;
332
625
  this.wsUrl = null;
333
626
 
334
- // Reject all pending requests
627
+ // Reject all pending requests with appropriate error types
335
628
  for (const [id, pending] of this.pendingRequests) {
336
629
  pending.callbacks.onClose?.();
337
- if (event.code === 4001) {
338
- pending.reject(new AuthenticationError('Authentication failed'));
339
- } else if (event.code === 4003) {
340
- pending.reject(new InsufficientCreditsError('Insufficient credits'));
630
+ // Only surface server-initiated error close codes; normal closes
631
+ // (1000, 1001) should not reject pending requests with an error.
632
+ if (
633
+ event.code === 4001 ||
634
+ event.code === 4003 ||
635
+ event.code === 4029 ||
636
+ event.code === 4500
637
+ ) {
638
+ const error = classifyWsClose(event.code, event.reason);
639
+ pending.callbacks.onError?.(error);
640
+ pending.reject(error);
341
641
  }
342
642
  this.pendingRequests.delete(id);
343
643
  }
@@ -345,7 +645,9 @@ class TTSResource {
345
645
 
346
646
  ws.onerror = () => {
347
647
  // Reject all pending requests
348
- const error = new KugelAudioError('WebSocket connection error');
648
+ const error = new ConnectionError(
649
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
650
+ );
349
651
  for (const [id, pending] of this.pendingRequests) {
350
652
  pending.callbacks.onError?.(error);
351
653
  pending.reject(error);
@@ -380,6 +682,7 @@ class TTSResource {
380
682
  options: GenerateOptions,
381
683
  callbacks: StreamCallbacks
382
684
  ): Promise<void> {
685
+ warnIfNoLanguage(options.language, options.normalize);
383
686
  const ws = await this.getConnection();
384
687
  const requestId = ++this.requestCounter;
385
688
 
@@ -393,10 +696,14 @@ class TTSResource {
393
696
  model_id: options.modelId || 'kugel-1-turbo',
394
697
  voice_id: options.voiceId,
395
698
  cfg_scale: options.cfgScale ?? 2.0,
699
+ ...(options.temperature !== undefined && { temperature: options.temperature }),
396
700
  max_new_tokens: options.maxNewTokens ?? 2048,
397
701
  sample_rate: options.sampleRate ?? 24000,
398
702
  normalize: options.normalize ?? true,
399
703
  ...(options.language && { language: options.language }),
704
+ ...(options.wordTimestamps && { word_timestamps: true }),
705
+ ...(options.speed !== undefined && { speed: options.speed }),
706
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
400
707
  }));
401
708
  });
402
709
  }
@@ -408,6 +715,7 @@ class TTSResource {
408
715
  options: GenerateOptions,
409
716
  callbacks: StreamCallbacks
410
717
  ): Promise<void> {
718
+ warnIfNoLanguage(options.language, options.normalize);
411
719
  return new Promise((resolve, reject) => {
412
720
  const url = this.buildWsUrl();
413
721
  const ws = createWs(url);
@@ -424,6 +732,9 @@ class TTSResource {
424
732
  sample_rate: options.sampleRate ?? 24000,
425
733
  normalize: options.normalize ?? true,
426
734
  ...(options.language && { language: options.language }),
735
+ ...(options.wordTimestamps && { word_timestamps: true }),
736
+ ...(options.speed !== undefined && { speed: options.speed }),
737
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
427
738
  }));
428
739
  };
429
740
 
@@ -438,7 +749,7 @@ class TTSResource {
438
749
  const data = JSON.parse(messageData);
439
750
 
440
751
  if (data.error) {
441
- const error = this.parseError(data.error);
752
+ const error = this.parseError(data);
442
753
  callbacks.onError?.(error);
443
754
  ws.close();
444
755
  reject(error);
@@ -452,7 +763,6 @@ class TTSResource {
452
763
  totalSamples: data.total_samples,
453
764
  durationMs: data.dur_ms,
454
765
  generationMs: data.gen_ms,
455
- ttfaMs: data.ttfa_ms,
456
766
  rtf: data.rtf,
457
767
  error: data.error,
458
768
  };
@@ -472,32 +782,87 @@ class TTSResource {
472
782
  };
473
783
  callbacks.onChunk?.(chunk);
474
784
  }
785
+
786
+ if (data.word_timestamps) {
787
+ const timestamps: WordTimestamp[] = data.word_timestamps.map(
788
+ (w: Record<string, unknown>) => ({
789
+ word: w.word as string,
790
+ startMs: w.start_ms as number,
791
+ endMs: w.end_ms as number,
792
+ charStart: w.char_start as number,
793
+ charEnd: w.char_end as number,
794
+ score: (w.score as number) ?? 1.0,
795
+ })
796
+ );
797
+ callbacks.onWordTimestamps?.(timestamps);
798
+ }
475
799
  } catch (e) {
476
800
  console.error('Failed to parse WebSocket message:', e);
477
801
  }
478
802
  };
479
803
 
480
- ws.onerror = () => {
481
- const error = new KugelAudioError('WebSocket connection error');
804
+ ws.onerror = (event: unknown) => {
805
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
806
+ const error =
807
+ classifyWsHandshakeError(underlying) ??
808
+ new ConnectionError(
809
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
810
+ );
482
811
  callbacks.onError?.(error);
483
812
  reject(error);
484
813
  };
485
814
 
486
815
  ws.onclose = (event) => {
487
816
  callbacks.onClose?.();
488
- if (event.code === 4001) {
489
- reject(new AuthenticationError('Authentication failed'));
490
- } else if (event.code === 4003) {
491
- reject(new InsufficientCreditsError('Insufficient credits'));
817
+ if (
818
+ event.code === 4001 ||
819
+ event.code === 4003 ||
820
+ event.code === 4029 ||
821
+ event.code === 4500
822
+ ) {
823
+ const error = classifyWsClose(event.code, event.reason);
824
+ callbacks.onError?.(error);
825
+ reject(error);
492
826
  }
493
827
  };
494
828
  });
495
829
  }
496
830
 
831
+ /**
832
+ * Start periodic keepalive pings on the pooled connection.
833
+ * Uses the ws package's ping() in Node.js; silently skips in browsers
834
+ * where WebSocket doesn't expose a ping method.
835
+ */
836
+ private startKeepalive(ws: WebSocket): void {
837
+ this.stopKeepalive();
838
+ const intervalMs = this.client.keepalivePingInterval;
839
+ if (intervalMs == null || intervalMs <= 0) return;
840
+
841
+ this.keepaliveTimer = setInterval(() => {
842
+ if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
843
+ this.stopKeepalive();
844
+ return;
845
+ }
846
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
847
+ if (typeof (ws as any).ping === 'function') {
848
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
849
+ (ws as any).ping();
850
+ }
851
+ }, intervalMs);
852
+ }
853
+
854
+ private stopKeepalive(): void {
855
+ if (this.keepaliveTimer !== null) {
856
+ clearInterval(this.keepaliveTimer);
857
+ this.keepaliveTimer = null;
858
+ }
859
+ }
860
+
497
861
  /**
498
862
  * Close the pooled WebSocket connection.
499
863
  */
500
864
  close(): void {
865
+ this.stopKeepalive();
501
866
  if (this.wsConnection) {
502
867
  try {
503
868
  this.wsConnection.close();
@@ -509,15 +874,43 @@ class TTSResource {
509
874
  }
510
875
  }
511
876
 
512
- private parseError(message: string): Error {
513
- const lower = message.toLowerCase();
514
- if (lower.includes('auth') || lower.includes('unauthorized')) {
515
- return new AuthenticationError(message);
516
- }
517
- if (lower.includes('credit')) {
518
- return new InsufficientCreditsError(message);
519
- }
520
- return new KugelAudioError(message);
877
+ private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
878
+ return classifyWsFrame(data);
879
+ }
880
+
881
+ /**
882
+ * Create a streaming session for LLM integration.
883
+ *
884
+ * The session connects to `/ws/tts/stream` and keeps a persistent
885
+ * connection across multiple {@link StreamingSession.send} calls.
886
+ * The server auto-chunks text at sentence boundaries — no client-side
887
+ * flushing required.
888
+ *
889
+ * @param config - Session configuration (voice, model, chunking strategy).
890
+ * @param callbacks - Callbacks for audio chunks and session lifecycle events.
891
+ * @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
892
+ *
893
+ * @example
894
+ * ```typescript
895
+ * const session = client.tts.streamingSession(
896
+ * { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
897
+ * { onChunk: (chunk) => playAudio(chunk.audio) },
898
+ * );
899
+ *
900
+ * session.connect();
901
+ *
902
+ * for await (const token of llmStream) {
903
+ * session.send(token);
904
+ * }
905
+ *
906
+ * await session.close();
907
+ * ```
908
+ */
909
+ streamingSession(
910
+ config: StreamConfig,
911
+ callbacks: StreamingSessionCallbacks
912
+ ): StreamingSession {
913
+ return new StreamingSession(this.client, config, callbacks);
521
914
  }
522
915
 
523
916
  /**
@@ -538,7 +931,7 @@ class TTSResource {
538
931
  * console.log(`Audio from ${chunk.contextId}`);
539
932
  * playAudio(chunk.audio);
540
933
  * },
541
- * onContextFinal: (contextId) => {
934
+ * onContextClosed: (contextId) => {
542
935
  * console.log(`${contextId} finished`);
543
936
  * },
544
937
  * });
@@ -589,8 +982,13 @@ class MultiContextSession {
589
982
 
590
983
  /**
591
984
  * Connect to the multi-context WebSocket endpoint.
985
+ *
986
+ * The returned promise resolves once the WebSocket is OPEN so callers can
987
+ * ``await session.connect(callbacks)`` before invoking
988
+ * {@link createContext} / {@link send}. Pre-open errors reject with the
989
+ * typed error.
592
990
  */
593
- connect(callbacks: import('./types').MultiContextCallbacks): void {
991
+ connect(callbacks: import('./types').MultiContextCallbacks): Promise<void> {
594
992
  this.callbacks = callbacks;
595
993
 
596
994
  const wsUrl = this.client.ttsUrl
@@ -608,12 +1006,9 @@ class MultiContextSession {
608
1006
 
609
1007
  const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
610
1008
  this.ws = createWs(url);
1009
+ const ws = this.ws;
611
1010
 
612
- this.ws.onopen = () => {
613
- // Connection established, ready to create contexts
614
- };
615
-
616
- this.ws.onmessage = (event: { data: unknown }) => {
1011
+ ws.onmessage = (event: { data: unknown }) => {
617
1012
  try {
618
1013
  // Handle both browser (string) and Node.js (Buffer) message formats
619
1014
  const messageData = typeof event.data === 'string'
@@ -654,10 +1049,6 @@ class MultiContextSession {
654
1049
  this.callbacks.onChunk?.(chunk);
655
1050
  }
656
1051
 
657
- if (data.is_final) {
658
- this.callbacks.onContextFinal?.(data.context_id);
659
- }
660
-
661
1052
  if (data.context_closed) {
662
1053
  this.contexts.delete(data.context_id);
663
1054
  this.callbacks.onContextClosed?.(data.context_id);
@@ -676,20 +1067,51 @@ class MultiContextSession {
676
1067
  }
677
1068
  };
678
1069
 
679
- this.ws.onerror = () => {
680
- this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
681
- };
1070
+ return new Promise<void>((resolve, reject) => {
1071
+ let opened = false;
682
1072
 
683
- this.ws.onclose = (event) => {
684
- if (event.code === 4001) {
685
- this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
686
- } else if (event.code === 4003) {
687
- this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
688
- }
689
- this.ws = null;
690
- this.isStarted = false;
691
- this.contexts.clear();
692
- };
1073
+ ws.onopen = () => {
1074
+ opened = true;
1075
+ resolve();
1076
+ };
1077
+
1078
+ ws.onerror = (event: unknown) => {
1079
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1080
+ const err =
1081
+ classifyWsHandshakeError(underlying) ??
1082
+ new ConnectionError(
1083
+ 'KugelAudio multi-context WebSocket connection error. ' +
1084
+ 'Check network connectivity.',
1085
+ );
1086
+ if (!opened) reject(err);
1087
+ this.callbacks.onError?.(err);
1088
+ };
1089
+
1090
+ ws.onclose = (event) => {
1091
+ let typedErr: KugelAudioError | null = null;
1092
+ if (
1093
+ event.code === 4001 ||
1094
+ event.code === 4003 ||
1095
+ event.code === 4029 ||
1096
+ event.code === 4500
1097
+ ) {
1098
+ typedErr = classifyWsClose(event.code, event.reason);
1099
+ this.callbacks.onError?.(typedErr);
1100
+ }
1101
+ if (!opened) {
1102
+ reject(
1103
+ typedErr ??
1104
+ new ConnectionError(
1105
+ `KugelAudio multi-context WebSocket closed before ready ` +
1106
+ `(code ${event.code}).`,
1107
+ ),
1108
+ );
1109
+ }
1110
+ this.ws = null;
1111
+ this.isStarted = false;
1112
+ this.contexts.clear();
1113
+ };
1114
+ });
693
1115
  }
694
1116
 
695
1117
  /**
@@ -713,10 +1135,13 @@ class MultiContextSession {
713
1135
 
714
1136
  // Include session config on first context
715
1137
  if (!this.isStarted) {
1138
+ warnIfNoLanguage(this.config.language, this.config.normalize);
716
1139
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
717
1140
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1141
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
718
1142
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
719
1143
  if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1144
+ if (this.config.language) msg.language = this.config.language;
720
1145
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
721
1146
  }
722
1147
 
@@ -821,6 +1246,336 @@ class MultiContextSession {
821
1246
  }
822
1247
  }
823
1248
 
1249
+ /**
1250
+ * Streaming session for LLM integration via `/ws/tts/stream`.
1251
+ *
1252
+ * The server accumulates text across multiple {@link send} calls and
1253
+ * auto-chunks it at sentence boundaries, keeping the KV cache warm between
1254
+ * chunks for natural prosody. You never need to call `flush` explicitly —
1255
+ * configure {@link StreamConfig.chunkLengthSchedule} or
1256
+ * {@link StreamConfig.autoMode} instead.
1257
+ *
1258
+ * @example
1259
+ * ```typescript
1260
+ * const session = client.tts.streamingSession({
1261
+ * voiceId: 123,
1262
+ * autoMode: true,
1263
+ * chunkLengthSchedule: [50, 100, 150, 250],
1264
+ * }, {
1265
+ * onChunk: (chunk) => playAudio(chunk.audio),
1266
+ * onSessionClosed: (totalSecs) => console.log(`Done: ${totalSecs}s`),
1267
+ * });
1268
+ *
1269
+ * session.connect();
1270
+ *
1271
+ * for await (const token of llmStream) {
1272
+ * session.send(token);
1273
+ * }
1274
+ *
1275
+ * await session.close();
1276
+ * ```
1277
+ */
1278
+ class StreamingSession {
1279
+ private ws: WebSocket | null = null;
1280
+ private config: StreamConfig;
1281
+ private callbacks: StreamingSessionCallbacks;
1282
+ private client: KugelAudio;
1283
+ private configSent = false;
1284
+
1285
+ constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks) {
1286
+ this.client = client;
1287
+ this.config = config;
1288
+ this.callbacks = callbacks;
1289
+ }
1290
+
1291
+ /**
1292
+ * Open the WebSocket connection and authenticate.
1293
+ *
1294
+ * The returned promise resolves once the WebSocket is OPEN, so callers can
1295
+ * ``await session.connect()`` and then ``send()`` without racing the
1296
+ * handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
1297
+ * the promise with the typed error.
1298
+ */
1299
+ connect(): Promise<void> {
1300
+ const wsUrl = this.client.ttsUrl
1301
+ .replace('https://', 'wss://')
1302
+ .replace('http://', 'ws://');
1303
+
1304
+ let authParam: string;
1305
+ if (this.client.isToken) {
1306
+ authParam = 'token';
1307
+ } else if (this.client.isMasterKey) {
1308
+ authParam = 'master_key';
1309
+ } else {
1310
+ authParam = 'api_key';
1311
+ }
1312
+
1313
+ const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1314
+ this.ws = createWs(url);
1315
+ const ws = this.ws;
1316
+
1317
+ ws.onmessage = (event: { data: unknown }) => {
1318
+ try {
1319
+ const messageData = typeof event.data === 'string'
1320
+ ? event.data
1321
+ : event.data instanceof Buffer
1322
+ ? event.data.toString()
1323
+ : String(event.data);
1324
+ const data = JSON.parse(messageData);
1325
+
1326
+ if (data.error) {
1327
+ this.callbacks.onError?.(new KugelAudioError(data.error));
1328
+ return;
1329
+ }
1330
+
1331
+ if (data.audio) {
1332
+ const chunk: AudioChunk = {
1333
+ audio: data.audio,
1334
+ encoding: data.enc || 'pcm_s16le',
1335
+ index: data.idx,
1336
+ sampleRate: data.sr,
1337
+ samples: data.samples,
1338
+ };
1339
+ this.callbacks.onChunk?.(chunk);
1340
+ }
1341
+
1342
+ if (data.word_timestamps) {
1343
+ const timestamps = data.word_timestamps.map((w: Record<string, unknown>) => ({
1344
+ word: w.word as string,
1345
+ startMs: w.start_ms as number,
1346
+ endMs: w.end_ms as number,
1347
+ charStart: w.char_start as number,
1348
+ charEnd: w.char_end as number,
1349
+ score: (w.score as number) ?? 1.0,
1350
+ }));
1351
+ this.callbacks.onWordTimestamps?.(timestamps);
1352
+ }
1353
+
1354
+ if (data.chunk_complete) {
1355
+ this.callbacks.onChunkComplete?.(
1356
+ data.chunk_id ?? 0,
1357
+ data.audio_seconds ?? 0,
1358
+ data.gen_ms ?? 0,
1359
+ );
1360
+ }
1361
+
1362
+ if (data.generation_started) {
1363
+ this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
1364
+ }
1365
+
1366
+ if (data.session_closed) {
1367
+ this.callbacks.onSessionClosed?.(
1368
+ data.total_audio_seconds ?? 0,
1369
+ data.total_text_chunks ?? 0,
1370
+ data.total_audio_chunks ?? 0,
1371
+ );
1372
+ }
1373
+ } catch (e) {
1374
+ console.error('[KugelAudio] Failed to parse streaming session message:', e);
1375
+ }
1376
+ };
1377
+
1378
+ return new Promise<void>((resolve, reject) => {
1379
+ let opened = false;
1380
+
1381
+ ws.onopen = () => {
1382
+ opened = true;
1383
+ resolve();
1384
+ };
1385
+
1386
+ ws.onerror = (event: unknown) => {
1387
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1388
+ const err =
1389
+ classifyWsHandshakeError(underlying) ??
1390
+ new ConnectionError(
1391
+ 'KugelAudio streaming WebSocket connection error. ' +
1392
+ 'Check network connectivity.',
1393
+ );
1394
+ if (!opened) reject(err);
1395
+ this.callbacks.onError?.(err);
1396
+ };
1397
+
1398
+ ws.onclose = (event) => {
1399
+ let typedErr: KugelAudioError | null = null;
1400
+ if (
1401
+ event.code === 4001 ||
1402
+ event.code === 4003 ||
1403
+ event.code === 4029 ||
1404
+ event.code === 4500
1405
+ ) {
1406
+ typedErr = classifyWsClose(event.code, event.reason);
1407
+ this.callbacks.onError?.(typedErr);
1408
+ }
1409
+ if (!opened) {
1410
+ reject(
1411
+ typedErr ??
1412
+ new ConnectionError(
1413
+ `KugelAudio streaming WebSocket closed before ready ` +
1414
+ `(code ${event.code}).`,
1415
+ ),
1416
+ );
1417
+ }
1418
+ this.ws = null;
1419
+ this.configSent = false;
1420
+ };
1421
+ });
1422
+ }
1423
+
1424
+ /**
1425
+ * Send a text chunk to the server (e.g. one LLM output token).
1426
+ *
1427
+ * The server buffers text across multiple calls and starts generating at
1428
+ * natural sentence boundaries automatically — no need to call `flush`.
1429
+ *
1430
+ * @param text - Raw text or LLM token to append to the server buffer.
1431
+ * @param flush - Force immediate generation of whatever is buffered.
1432
+ * **Avoid calling this per-sentence from the client.** Doing so bypasses
1433
+ * the server's semantic chunking, incurs a fresh model prefill cost on
1434
+ * every flush, and makes latency *worse*, not better. Let the server
1435
+ * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1436
+ */
1437
+ send(text: string, flush = false): void {
1438
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
1439
+ throw new KugelAudioError('StreamingSession not connected. Call connect() first.');
1440
+ }
1441
+
1442
+ const msg: Record<string, unknown> = { text, flush };
1443
+
1444
+ if (!this.configSent) {
1445
+ if (this.config.voiceId !== undefined) msg.voice_id = this.config.voiceId;
1446
+ if (this.config.modelId !== undefined) msg.model_id = this.config.modelId;
1447
+ if (this.config.cfgScale !== undefined) msg.cfg_scale = this.config.cfgScale;
1448
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
1449
+ if (this.config.maxNewTokens !== undefined) msg.max_new_tokens = this.config.maxNewTokens;
1450
+ if (this.config.sampleRate !== undefined) msg.sample_rate = this.config.sampleRate;
1451
+ if (this.config.flushTimeoutMs !== undefined) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1452
+ if (this.config.maxBufferLength !== undefined) msg.max_buffer_length = this.config.maxBufferLength;
1453
+ if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1454
+ if (this.config.language !== undefined) msg.language = this.config.language;
1455
+ if (this.config.wordTimestamps) msg.word_timestamps = true;
1456
+ if (this.config.autoMode !== undefined) msg.auto_mode = this.config.autoMode;
1457
+ if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1458
+ if (this.config.speed !== undefined) msg.speed = this.config.speed;
1459
+ this.configSent = true;
1460
+ }
1461
+
1462
+ this.ws.send(JSON.stringify(msg));
1463
+ }
1464
+
1465
+ /**
1466
+ * End the current session but keep the WebSocket connection open.
1467
+ *
1468
+ * This allows starting a new session on the same connection, avoiding
1469
+ * the overhead of a new WebSocket handshake (~200-300ms). After calling
1470
+ * this, optionally call {@link updateConfig} to change voice/model settings,
1471
+ * then call {@link send} to start the next session.
1472
+ *
1473
+ * The returned promise resolves once the server confirms with a
1474
+ * `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
1475
+ * elapse without *any* server message arriving. The timer resets on every
1476
+ * incoming frame so a long final flush that streams audio for tens of
1477
+ * seconds is not truncated; only a genuinely silent server trips the fuse.
1478
+ */
1479
+ endSession(): Promise<void> {
1480
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1481
+
1482
+ const ws = this.ws;
1483
+ // Quiet timeout: resets on every incoming server message. Trips only when
1484
+ // the server has been silent for this long. The previous wall-clock fuse
1485
+ // (10 s total) silently truncated audio when the final flushed chunk
1486
+ // took longer to generate than the budget — see fix in this commit.
1487
+ const QUIET_TIMEOUT_MS = 15_000;
1488
+
1489
+ return new Promise<void>((resolve) => {
1490
+ let settled = false;
1491
+ let timer: ReturnType<typeof setTimeout>;
1492
+
1493
+ const prevMessage = ws.onmessage;
1494
+ const prevClose = ws.onclose;
1495
+
1496
+ const done = () => {
1497
+ if (settled) return;
1498
+ settled = true;
1499
+ clearTimeout(timer);
1500
+ // Restore the original handlers so subsequent endSession() calls
1501
+ // don't stack wrappers and so the typed-error onclose installed
1502
+ // by connect() remains in effect for the next session.
1503
+ ws.onmessage = prevMessage;
1504
+ ws.onclose = prevClose;
1505
+ this.configSent = false;
1506
+ resolve();
1507
+ };
1508
+
1509
+ const armQuietTimer = () => {
1510
+ clearTimeout(timer);
1511
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1512
+ };
1513
+
1514
+ armQuietTimer();
1515
+
1516
+ ws.onmessage = (event: MessageEvent) => {
1517
+ // Reset the quiet timer on EVERY incoming frame — audio chunks for
1518
+ // the final flush count as liveness, not just session_closed.
1519
+ armQuietTimer();
1520
+ if (prevMessage) prevMessage.call(ws, event);
1521
+ try {
1522
+ const raw = typeof event.data === 'string'
1523
+ ? event.data
1524
+ : event.data instanceof Buffer
1525
+ ? event.data.toString()
1526
+ : String(event.data);
1527
+ if (JSON.parse(raw).session_closed) done();
1528
+ } catch { /* ignore parse errors */ }
1529
+ };
1530
+
1531
+ ws.onclose = (event: CloseEvent) => {
1532
+ this.ws = null;
1533
+ if (prevClose) prevClose.call(ws, event);
1534
+ done();
1535
+ };
1536
+
1537
+ ws.send(JSON.stringify({ close: true }));
1538
+ });
1539
+ }
1540
+
1541
+ /**
1542
+ * Update session configuration for the next session.
1543
+ *
1544
+ * Call this after {@link endSession} and before the next {@link send}
1545
+ * to change voice, model, language, or other settings.
1546
+ */
1547
+ updateConfig(config: Partial<StreamConfig>): void {
1548
+ Object.assign(this.config, config);
1549
+ this.configSent = false;
1550
+ }
1551
+
1552
+ /**
1553
+ * Close the session and the WebSocket connection.
1554
+ *
1555
+ * For session reuse without closing the connection, use
1556
+ * {@link endSession} instead.
1557
+ *
1558
+ * The returned promise resolves once the server confirms the close with a
1559
+ * `session_closed` message, or after a 15 s **quiet** timeout (no traffic
1560
+ * from the server in that window). Audio frames from the server-side
1561
+ * final-flush of the still-buffered text are delivered to your callbacks
1562
+ * before this promise resolves, and each frame resets the quiet timer.
1563
+ */
1564
+ async close(): Promise<void> {
1565
+ await this.endSession();
1566
+
1567
+ if (this.ws) {
1568
+ try { this.ws.close(); } catch { /* already closed */ }
1569
+ this.ws = null;
1570
+ }
1571
+ }
1572
+
1573
+ /** Whether the underlying WebSocket is open. */
1574
+ get isConnected(): boolean {
1575
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
1576
+ }
1577
+ }
1578
+
824
1579
  /**
825
1580
  * KugelAudio API client.
826
1581
  *
@@ -834,13 +1589,13 @@ class MultiContextSession {
834
1589
  * // List voices
835
1590
  * const voices = await client.voices.list();
836
1591
  *
837
- * // Generate audio with fast model (1.5B params)
1592
+ * // Generate audio with fast model
838
1593
  * const audio = await client.tts.generate({
839
1594
  * text: 'Hello, world!',
840
1595
  * modelId: 'kugel-1-turbo',
841
1596
  * });
842
1597
  *
843
- * // Generate audio with premium model (7B params)
1598
+ * // Generate audio with premium model
844
1599
  * const audio = await client.tts.generate({
845
1600
  * text: 'Hello, world!',
846
1601
  * modelId: 'kugel-1',
@@ -855,6 +1610,7 @@ export class KugelAudio {
855
1610
  private _apiUrl: string;
856
1611
  private _ttsUrl: string;
857
1612
  private _timeout: number;
1613
+ private _keepalivePingInterval: number | null;
858
1614
 
859
1615
  /** Models resource */
860
1616
  public readonly models: ModelsResource;
@@ -865,17 +1621,37 @@ export class KugelAudio {
865
1621
 
866
1622
  constructor(options: KugelAudioOptions) {
867
1623
  if (!options.apiKey) {
868
- throw new Error('API key is required');
1624
+ throw new ValidationError(
1625
+ 'KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY ' +
1626
+ 'environment variable or pass { apiKey: ... } to the client. ' +
1627
+ 'Get a key at https://app.kugelaudio.com/settings/api-keys.',
1628
+ );
869
1629
  }
870
1630
 
871
- this._apiKey = options.apiKey;
1631
+ const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
1632
+ this._apiKey = cleanKey;
872
1633
  this._isMasterKey = options.isMasterKey || false;
873
1634
  this._isToken = options.isToken || false;
874
1635
  this._orgId = options.orgId;
875
- this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
1636
+
1637
+ if (options.apiUrl) {
1638
+ this._apiUrl = options.apiUrl.replace(/\/$/, '');
1639
+ } else {
1640
+ const effectiveRegion = options.region || detectedRegion || 'eu';
1641
+ if (!(effectiveRegion in REGION_URLS)) {
1642
+ throw new ValidationError(
1643
+ `Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(', ')}.`,
1644
+ );
1645
+ }
1646
+ this._apiUrl = REGION_URLS[effectiveRegion as Region];
1647
+ }
1648
+
876
1649
  // If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
877
1650
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
878
1651
  this._timeout = options.timeout || 60000;
1652
+ this._keepalivePingInterval = options.keepalivePingInterval !== undefined
1653
+ ? options.keepalivePingInterval
1654
+ : 20000;
879
1655
 
880
1656
  this.models = new ModelsResource(this);
881
1657
  this.voices = new VoicesResource(this);
@@ -929,6 +1705,11 @@ export class KugelAudio {
929
1705
  return this._ttsUrl;
930
1706
  }
931
1707
 
1708
+ /** Get keepalive ping interval in milliseconds, or null if disabled. */
1709
+ get keepalivePingInterval(): number | null {
1710
+ return this._keepalivePingInterval;
1711
+ }
1712
+
932
1713
  /**
933
1714
  * Close the client and release resources.
934
1715
  * This closes any pooled WebSocket connections.
@@ -991,25 +1772,57 @@ export class KugelAudio {
991
1772
 
992
1773
  clearTimeout(timeoutId);
993
1774
 
994
- if (response.status === 401) {
995
- throw new AuthenticationError('Invalid API key');
1775
+ if (!response.ok) {
1776
+ const text = await response.text();
1777
+ throw classifyHttpError(response.status, text, response.headers);
996
1778
  }
997
- if (response.status === 403) {
998
- throw new InsufficientCreditsError('Access denied');
1779
+
1780
+ return await response.json();
1781
+ } catch (error) {
1782
+ clearTimeout(timeoutId);
1783
+ if (error instanceof KugelAudioError) {
1784
+ throw error;
999
1785
  }
1000
- if (response.status === 429) {
1001
- throw new RateLimitError('Rate limit exceeded');
1786
+ if ((error as Error).name === 'AbortError') {
1787
+ throw new ConnectionError(
1788
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1789
+ );
1002
1790
  }
1791
+ throw new ConnectionError(
1792
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1793
+ 'Check network connectivity.',
1794
+ );
1795
+ }
1796
+ }
1797
+
1798
+ /**
1799
+ * Make a multipart/form-data request (for file uploads).
1800
+ * @internal Used by VoicesResource for reference file uploads.
1801
+ */
1802
+ async requestMultipart<T>(method: string, path: string, formData: FormData): Promise<T> {
1803
+ const url = `${this._apiUrl}${path}`;
1804
+
1805
+ const headers: Record<string, string> = {
1806
+ 'X-API-Key': this._apiKey,
1807
+ 'Authorization': `Bearer ${this._apiKey}`,
1808
+ };
1809
+
1810
+ const controller = new AbortController();
1811
+ const timeoutId = setTimeout(() => controller.abort(), this._timeout);
1812
+
1813
+ try {
1814
+ const response = await fetch(url, {
1815
+ method,
1816
+ headers,
1817
+ body: formData,
1818
+ signal: controller.signal,
1819
+ });
1820
+
1821
+ clearTimeout(timeoutId);
1822
+
1003
1823
  if (!response.ok) {
1004
1824
  const text = await response.text();
1005
- let message = `HTTP ${response.status}`;
1006
- try {
1007
- const json = JSON.parse(text);
1008
- message = json.detail || json.error || message;
1009
- } catch {
1010
- message = text || message;
1011
- }
1012
- throw new KugelAudioError(message, response.status);
1825
+ throw classifyHttpError(response.status, text, response.headers);
1013
1826
  }
1014
1827
 
1015
1828
  return await response.json();
@@ -1019,9 +1832,14 @@ export class KugelAudio {
1019
1832
  throw error;
1020
1833
  }
1021
1834
  if ((error as Error).name === 'AbortError') {
1022
- throw new KugelAudioError('Request timed out');
1835
+ throw new ConnectionError(
1836
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1837
+ );
1023
1838
  }
1024
- throw new KugelAudioError(`Request failed: ${(error as Error).message}`);
1839
+ throw new ConnectionError(
1840
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1841
+ 'Check network connectivity.',
1842
+ );
1025
1843
  }
1026
1844
  }
1027
1845
  }