kugelaudio 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/client.ts CHANGED
@@ -3,26 +3,52 @@
3
3
  */
4
4
 
5
5
  import {
6
- AuthenticationError,
7
- InsufficientCreditsError,
6
+ ConnectionError,
8
7
  KugelAudioError,
9
- RateLimitError,
8
+ ValidationError,
9
+ classifyHttpError,
10
+ classifyWsClose,
11
+ classifyWsFrame,
12
+ classifyWsHandshakeError,
10
13
  } from './errors';
11
14
  import type {
12
15
  AudioChunk,
13
16
  AudioResponse,
17
+ CreateVoiceOptions,
14
18
  GenerateOptions,
15
19
  GenerationStats,
16
20
  KugelAudioOptions,
17
21
  Model,
18
22
  StreamCallbacks,
19
- Voice,
23
+ StreamConfig,
24
+ StreamingSessionCallbacks,
25
+ UpdateVoiceOptions,
26
+ VoiceDetail,
27
+ VoiceListResponse,
28
+ VoiceReference,
20
29
  WordTimestamp
21
30
  } from './types';
22
31
  import { base64ToArrayBuffer } from './utils';
23
32
  import { getWebSocket } from './websocket';
24
33
 
25
- const DEFAULT_API_URL = 'https://api.kugelaudio.com';
34
+ import type { Region } from './types';
35
+
36
+ const REGION_URLS: Record<Region, string> = {
37
+ eu: 'https://api.kugelaudio.com',
38
+ us: 'https://us-api.kugelaudio.com',
39
+ global: 'https://global-api.kugelaudio.com',
40
+ };
41
+
42
+ const REGION_PREFIXES = ['eu-', 'us-', 'global-'] as const;
43
+
44
+ function parseApiKey(apiKey: string): { cleanKey: string; detectedRegion?: Region } {
45
+ for (const prefix of REGION_PREFIXES) {
46
+ if (apiKey.startsWith(prefix)) {
47
+ return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) as Region };
48
+ }
49
+ }
50
+ return { cleanKey: apiKey };
51
+ }
26
52
 
27
53
  /**
28
54
  * Create a new WebSocket instance.
@@ -37,6 +63,23 @@ function createWs(url: string): WebSocket {
37
63
  /** WebSocket OPEN readyState constant. */
38
64
  const WS_OPEN = 1;
39
65
 
66
+ let _languageWarningLogged = false;
67
+
68
+ function warnIfNoLanguage(
69
+ language: string | undefined,
70
+ normalize: boolean | undefined
71
+ ): void {
72
+ const normEnabled = normalize === undefined || normalize;
73
+ if (!language && normEnabled && !_languageWarningLogged) {
74
+ _languageWarningLogged = true;
75
+ console.warn(
76
+ "[KugelAudio] No 'language' set with normalization enabled — the server " +
77
+ 'will auto-detect the language, adding ~60-150ms to TTFA. Set language ' +
78
+ "(e.g., language: 'en') for optimal latency."
79
+ );
80
+ }
81
+ }
82
+
40
83
  /**
41
84
  * Models resource for listing TTS models.
42
85
  */
@@ -72,52 +115,211 @@ class VoicesResource {
72
115
  language?: string;
73
116
  includePublic?: boolean;
74
117
  limit?: number;
75
- }): Promise<Voice[]> {
118
+ offset?: number;
119
+ }): Promise<VoiceListResponse> {
76
120
  const params = new URLSearchParams();
77
121
  if (options?.language) params.set('language', options.language);
78
122
  if (options?.includePublic !== undefined) {
79
123
  params.set('include_public', String(options.includePublic));
80
124
  }
81
125
  if (options?.limit) params.set('limit', String(options.limit));
126
+ if (options?.offset) params.set('offset', String(options.offset));
82
127
 
83
128
  const query = params.toString();
84
129
  const path = query ? `/v1/voices?${query}` : '/v1/voices';
85
- const response = await this.client.request<{ voices: any[] }>('GET', path);
130
+ const response = await this.client.request<{ voices: any[]; total: number; limit: number; offset: number }>('GET', path);
86
131
 
87
- return response.voices.map((v) => ({
88
- id: v.id,
89
- name: v.name,
90
- description: v.description,
91
- category: v.category,
92
- sex: v.sex,
93
- age: v.age,
94
- supportedLanguages: v.supported_languages || [],
95
- sampleText: v.sample_text,
96
- avatarUrl: v.avatar_url,
97
- sampleUrl: v.sample_url,
98
- isPublic: v.is_public || false,
99
- verified: v.verified || false,
100
- }));
132
+ return {
133
+ voices: response.voices.map((v) => ({
134
+ id: v.id,
135
+ name: v.name,
136
+ description: v.description,
137
+ category: v.category,
138
+ sex: v.sex,
139
+ age: v.age,
140
+ supportedLanguages: v.supported_languages || [],
141
+ sampleText: v.sample_text,
142
+ avatarUrl: v.avatar_url,
143
+ sampleUrl: v.sample_url,
144
+ isPublic: v.is_public || false,
145
+ verified: v.verified || false,
146
+ })),
147
+ total: response.total,
148
+ limit: response.limit,
149
+ offset: response.offset,
150
+ };
101
151
  }
102
152
 
103
153
  /**
104
154
  * Get a specific voice by ID.
105
155
  */
106
- async get(voiceId: number): Promise<Voice> {
156
+ async get(voiceId: number): Promise<VoiceDetail> {
107
157
  const v = await this.client.request<any>('GET', `/v1/voices/${voiceId}`);
158
+ return this.mapVoiceDetail(v);
159
+ }
160
+
161
+ /**
162
+ * Create a new voice.
163
+ */
164
+ async create(options: CreateVoiceOptions): Promise<VoiceDetail> {
165
+ const metadata = {
166
+ name: options.name,
167
+ sex: options.sex,
168
+ description: options.description ?? '',
169
+ category: options.category ?? 'conversational',
170
+ age: options.age ?? 'middle_age',
171
+ quality: options.quality ?? 'mid',
172
+ supported_languages: options.supportedLanguages ?? ['en'],
173
+ is_public: options.isPublic ?? false,
174
+ sample_text: options.sampleText ?? '',
175
+ };
176
+
177
+ const formData = new FormData();
178
+ formData.append(
179
+ 'metadata',
180
+ new Blob([JSON.stringify(metadata)], { type: 'application/json' }),
181
+ );
182
+
183
+ if (options.referenceFiles) {
184
+ for (const file of options.referenceFiles) {
185
+ formData.append('files', file);
186
+ }
187
+ }
188
+
189
+ const v = await this.client.requestMultipart<any>('POST', '/v1/voices', formData);
190
+ return this.mapVoiceDetail(v);
191
+ }
192
+
193
+ /**
194
+ * Update an existing voice. Only provided fields are updated.
195
+ */
196
+ async update(voiceId: number, options: UpdateVoiceOptions): Promise<VoiceDetail> {
197
+ const payload: Record<string, unknown> = {};
198
+ if (options.name !== undefined) payload.name = options.name;
199
+ if (options.description !== undefined) payload.description = options.description;
200
+ if (options.category !== undefined) payload.category = options.category;
201
+ if (options.age !== undefined) payload.age = options.age;
202
+ if (options.sex !== undefined) payload.sex = options.sex;
203
+ if (options.quality !== undefined) payload.quality = options.quality;
204
+ if (options.supportedLanguages !== undefined) payload.supported_languages = options.supportedLanguages;
205
+ if (options.isPublic !== undefined) payload.is_public = options.isPublic;
206
+ if (options.sampleText !== undefined) payload.sample_text = options.sampleText;
207
+
208
+ const v = await this.client.request<any>('PATCH', `/v1/voices/${voiceId}`, payload);
209
+ return this.mapVoiceDetail(v);
210
+ }
211
+
212
+ /**
213
+ * Delete a voice.
214
+ */
215
+ async delete(voiceId: number): Promise<void> {
216
+ await this.client.request<any>('DELETE', `/v1/voices/${voiceId}`);
217
+ }
218
+
219
+ // -- Reference management --
220
+
221
+ /**
222
+ * List reference audio files for a voice.
223
+ */
224
+ async listReferences(voiceId: number): Promise<VoiceReference[]> {
225
+ const response = await this.client.request<{ references: any[] }>(
226
+ 'GET',
227
+ `/v1/voices/${voiceId}/references`,
228
+ );
229
+ return response.references.map((r) => this.mapVoiceReference(r));
230
+ }
231
+
232
+ /**
233
+ * Upload a reference audio file to a voice.
234
+ *
235
+ * @param voiceId - Voice ID
236
+ * @param file - Audio file (File in browser, Blob in Node.js)
237
+ * @param referenceText - Optional transcript of the reference audio
238
+ */
239
+ async addReference(
240
+ voiceId: number,
241
+ file: File | Blob,
242
+ referenceText?: string,
243
+ ): Promise<VoiceReference> {
244
+ const formData = new FormData();
245
+ formData.append('file', file);
246
+ if (referenceText) {
247
+ formData.append('reference_text', referenceText);
248
+ }
249
+
250
+ const r = await this.client.requestMultipart<any>(
251
+ 'POST',
252
+ `/v1/voices/${voiceId}/references`,
253
+ formData,
254
+ );
255
+ return this.mapVoiceReference(r);
256
+ }
257
+
258
+ /**
259
+ * Delete a reference audio file from a voice.
260
+ */
261
+ async deleteReference(voiceId: number, referenceId: number): Promise<void> {
262
+ await this.client.request<any>(
263
+ 'DELETE',
264
+ `/v1/voices/${voiceId}/references/${referenceId}`,
265
+ );
266
+ }
267
+
268
+ // -- Publishing --
269
+
270
+ /**
271
+ * Request publication of a voice. Sets it as public and marks it
272
+ * as pending verification by an admin.
273
+ */
274
+ async publish(voiceId: number): Promise<VoiceDetail> {
275
+ const v = await this.client.request<any>('POST', `/v1/voices/${voiceId}/publish`);
276
+ return this.mapVoiceDetail(v);
277
+ }
278
+
279
+ // -- Sample generation --
280
+
281
+ /**
282
+ * Trigger sample audio generation for a voice.
283
+ */
284
+ async generateSample(voiceId: number): Promise<VoiceDetail> {
285
+ const v = await this.client.request<any>(
286
+ 'POST',
287
+ `/v1/voices/${voiceId}/generate-sample`,
288
+ );
289
+ return this.mapVoiceDetail(v);
290
+ }
291
+
292
+ // -- Helpers --
293
+
294
+ private mapVoiceDetail(v: any): VoiceDetail {
108
295
  return {
109
296
  id: v.id,
110
297
  name: v.name,
111
- description: v.description,
112
- category: v.category,
113
- sex: v.sex,
298
+ description: v.description ?? '',
299
+ generativeVoiceDescription: v.generative_voice_description ?? '',
300
+ supportedLanguages: v.supported_languages ?? [],
301
+ category: v.category ?? 'cloned',
114
302
  age: v.age,
115
- supportedLanguages: v.supported_languages || [],
116
- sampleText: v.sample_text,
117
- avatarUrl: v.avatar_url,
303
+ sex: v.sex,
304
+ quality: v.quality ?? 'mid',
305
+ isPublic: v.is_public ?? false,
306
+ verified: v.verified ?? false,
307
+ pendingVerification: v.pending_verification ?? false,
118
308
  sampleUrl: v.sample_url,
119
- isPublic: v.is_public || false,
120
- verified: v.verified || false,
309
+ avatarUrl: v.avatar_url,
310
+ sampleText: v.sample_text ?? '',
311
+ };
312
+ }
313
+
314
+ private mapVoiceReference(r: any): VoiceReference {
315
+ return {
316
+ id: r.id,
317
+ voiceId: r.voice_id,
318
+ name: r.name ?? '',
319
+ referenceText: r.reference_text ?? '',
320
+ s3Path: r.s3_path ?? '',
321
+ audioUrl: r.audio_url,
322
+ isGenerated: r.is_generated ?? false,
121
323
  };
122
324
  }
123
325
  }
@@ -135,6 +337,7 @@ class TTSResource {
135
337
  reject: (error: Error) => void;
136
338
  }> = new Map();
137
339
  private requestCounter = 0;
340
+ private keepaliveTimer: ReturnType<typeof setInterval> | null = null;
138
341
 
139
342
  constructor(private client: KugelAudio) {}
140
343
 
@@ -207,6 +410,67 @@ class TTSResource {
207
410
  };
208
411
  }
209
412
 
413
+ /**
414
+ * Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
415
+ *
416
+ * **Node.js only** — this method requires the `stream` built-in module and is
417
+ * intended for server-side integrations such as Vapi custom TTS endpoints,
418
+ * Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
419
+ *
420
+ * Compared to manually wiring `onChunk` to a `Readable`, this method avoids
421
+ * a common race-condition: the stream object is created and returned **before**
422
+ * any chunks arrive, so the caller can safely pipe or attach listeners before
423
+ * the first audio byte is pushed.
424
+ *
425
+ * @example Vapi custom TTS endpoint
426
+ * ```typescript
427
+ * app.post('/synthesize', (req, res) => {
428
+ * res.setHeader('Content-Type', 'audio/pcm');
429
+ * res.setHeader('Transfer-Encoding', 'chunked');
430
+ *
431
+ * const readable = client.tts.toReadable({
432
+ * text: req.body.message.text,
433
+ * modelId: 'kugel-1-turbo',
434
+ * sampleRate: req.body.message.sampleRate,
435
+ * language: 'en',
436
+ * });
437
+ *
438
+ * readable.pipe(res);
439
+ * });
440
+ * ```
441
+ *
442
+ * @param options - TTS generation options (same as `stream()`)
443
+ * @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
444
+ * @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
445
+ */
446
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
447
+ toReadable(options: GenerateOptions, reuseConnection = true): any {
448
+ // Dynamic require keeps browser bundles free of Node.js built-ins.
449
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
450
+ const { Readable } = require('stream') as typeof import('stream');
451
+ const readable = new Readable({ read() {} });
452
+
453
+ this.stream(
454
+ options,
455
+ {
456
+ onChunk: (chunk: AudioChunk) => {
457
+ readable.push(Buffer.from(chunk.audio, 'base64'));
458
+ },
459
+ onFinal: () => {
460
+ readable.push(null);
461
+ },
462
+ onError: (error: Error) => {
463
+ readable.destroy(error);
464
+ },
465
+ },
466
+ reuseConnection
467
+ ).catch((error: Error) => {
468
+ readable.destroy(error);
469
+ });
470
+
471
+ return readable;
472
+ }
473
+
210
474
  /**
211
475
  * Build the WebSocket URL with appropriate auth param.
212
476
  */
@@ -265,11 +529,20 @@ class TTSResource {
265
529
  this.wsConnection = ws;
266
530
  this.wsUrl = url;
267
531
  this.setupMessageHandler(ws);
532
+ this.startKeepalive(ws);
268
533
  resolve(ws);
269
534
  };
270
535
 
271
- ws.onerror = () => {
272
- reject(new KugelAudioError('WebSocket connection error'));
536
+ ws.onerror = (event: unknown) => {
537
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
538
+ const typed = classifyWsHandshakeError(underlying);
539
+ reject(
540
+ typed ??
541
+ new ConnectionError(
542
+ `Could not establish KugelAudio WebSocket connection to ${url}. ` +
543
+ 'Check network connectivity.',
544
+ ),
545
+ );
273
546
  };
274
547
  });
275
548
  }
@@ -293,7 +566,7 @@ class TTSResource {
293
566
  if (!pending) return;
294
567
 
295
568
  if (data.error) {
296
- const error = this.parseError(data.error);
569
+ const error = this.parseError(data);
297
570
  pending.callbacks.onError?.(error);
298
571
  this.pendingRequests.delete(requestId);
299
572
  pending.reject(error);
@@ -307,7 +580,6 @@ class TTSResource {
307
580
  totalSamples: data.total_samples,
308
581
  durationMs: data.dur_ms,
309
582
  generationMs: data.gen_ms,
310
- ttfaMs: data.ttfa_ms,
311
583
  rtf: data.rtf,
312
584
  error: data.error,
313
585
  };
@@ -347,17 +619,25 @@ class TTSResource {
347
619
  };
348
620
 
349
621
  ws.onclose = (event) => {
350
- // Clear connection pool
622
+ // Clear connection pool and keepalive
623
+ this.stopKeepalive();
351
624
  this.wsConnection = null;
352
625
  this.wsUrl = null;
353
626
 
354
- // Reject all pending requests
627
+ // Reject all pending requests with appropriate error types
355
628
  for (const [id, pending] of this.pendingRequests) {
356
629
  pending.callbacks.onClose?.();
357
- if (event.code === 4001) {
358
- pending.reject(new AuthenticationError('Authentication failed'));
359
- } else if (event.code === 4003) {
360
- pending.reject(new InsufficientCreditsError('Insufficient credits'));
630
+ // Only surface server-initiated error close codes; normal closes
631
+ // (1000, 1001) should not reject pending requests with an error.
632
+ if (
633
+ event.code === 4001 ||
634
+ event.code === 4003 ||
635
+ event.code === 4029 ||
636
+ event.code === 4500
637
+ ) {
638
+ const error = classifyWsClose(event.code, event.reason);
639
+ pending.callbacks.onError?.(error);
640
+ pending.reject(error);
361
641
  }
362
642
  this.pendingRequests.delete(id);
363
643
  }
@@ -365,7 +645,9 @@ class TTSResource {
365
645
 
366
646
  ws.onerror = () => {
367
647
  // Reject all pending requests
368
- const error = new KugelAudioError('WebSocket connection error');
648
+ const error = new ConnectionError(
649
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
650
+ );
369
651
  for (const [id, pending] of this.pendingRequests) {
370
652
  pending.callbacks.onError?.(error);
371
653
  pending.reject(error);
@@ -400,6 +682,7 @@ class TTSResource {
400
682
  options: GenerateOptions,
401
683
  callbacks: StreamCallbacks
402
684
  ): Promise<void> {
685
+ warnIfNoLanguage(options.language, options.normalize);
403
686
  const ws = await this.getConnection();
404
687
  const requestId = ++this.requestCounter;
405
688
 
@@ -413,11 +696,14 @@ class TTSResource {
413
696
  model_id: options.modelId || 'kugel-1-turbo',
414
697
  voice_id: options.voiceId,
415
698
  cfg_scale: options.cfgScale ?? 2.0,
699
+ ...(options.temperature !== undefined && { temperature: options.temperature }),
416
700
  max_new_tokens: options.maxNewTokens ?? 2048,
417
701
  sample_rate: options.sampleRate ?? 24000,
418
702
  normalize: options.normalize ?? true,
419
703
  ...(options.language && { language: options.language }),
420
704
  ...(options.wordTimestamps && { word_timestamps: true }),
705
+ ...(options.speed !== undefined && { speed: options.speed }),
706
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
421
707
  }));
422
708
  });
423
709
  }
@@ -429,6 +715,7 @@ class TTSResource {
429
715
  options: GenerateOptions,
430
716
  callbacks: StreamCallbacks
431
717
  ): Promise<void> {
718
+ warnIfNoLanguage(options.language, options.normalize);
432
719
  return new Promise((resolve, reject) => {
433
720
  const url = this.buildWsUrl();
434
721
  const ws = createWs(url);
@@ -446,6 +733,8 @@ class TTSResource {
446
733
  normalize: options.normalize ?? true,
447
734
  ...(options.language && { language: options.language }),
448
735
  ...(options.wordTimestamps && { word_timestamps: true }),
736
+ ...(options.speed !== undefined && { speed: options.speed }),
737
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
449
738
  }));
450
739
  };
451
740
 
@@ -460,7 +749,7 @@ class TTSResource {
460
749
  const data = JSON.parse(messageData);
461
750
 
462
751
  if (data.error) {
463
- const error = this.parseError(data.error);
752
+ const error = this.parseError(data);
464
753
  callbacks.onError?.(error);
465
754
  ws.close();
466
755
  reject(error);
@@ -474,7 +763,6 @@ class TTSResource {
474
763
  totalSamples: data.total_samples,
475
764
  durationMs: data.dur_ms,
476
765
  generationMs: data.gen_ms,
477
- ttfaMs: data.ttfa_ms,
478
766
  rtf: data.rtf,
479
767
  error: data.error,
480
768
  };
@@ -513,27 +801,68 @@ class TTSResource {
513
801
  }
514
802
  };
515
803
 
516
- ws.onerror = () => {
517
- const error = new KugelAudioError('WebSocket connection error');
804
+ ws.onerror = (event: unknown) => {
805
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
806
+ const error =
807
+ classifyWsHandshakeError(underlying) ??
808
+ new ConnectionError(
809
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
810
+ );
518
811
  callbacks.onError?.(error);
519
812
  reject(error);
520
813
  };
521
814
 
522
815
  ws.onclose = (event) => {
523
816
  callbacks.onClose?.();
524
- if (event.code === 4001) {
525
- reject(new AuthenticationError('Authentication failed'));
526
- } else if (event.code === 4003) {
527
- reject(new InsufficientCreditsError('Insufficient credits'));
817
+ if (
818
+ event.code === 4001 ||
819
+ event.code === 4003 ||
820
+ event.code === 4029 ||
821
+ event.code === 4500
822
+ ) {
823
+ const error = classifyWsClose(event.code, event.reason);
824
+ callbacks.onError?.(error);
825
+ reject(error);
528
826
  }
529
827
  };
530
828
  });
531
829
  }
532
830
 
831
+ /**
832
+ * Start periodic keepalive pings on the pooled connection.
833
+ * Uses the ws package's ping() in Node.js; silently skips in browsers
834
+ * where WebSocket doesn't expose a ping method.
835
+ */
836
+ private startKeepalive(ws: WebSocket): void {
837
+ this.stopKeepalive();
838
+ const intervalMs = this.client.keepalivePingInterval;
839
+ if (intervalMs == null || intervalMs <= 0) return;
840
+
841
+ this.keepaliveTimer = setInterval(() => {
842
+ if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
843
+ this.stopKeepalive();
844
+ return;
845
+ }
846
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
847
+ if (typeof (ws as any).ping === 'function') {
848
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
849
+ (ws as any).ping();
850
+ }
851
+ }, intervalMs);
852
+ }
853
+
854
+ private stopKeepalive(): void {
855
+ if (this.keepaliveTimer !== null) {
856
+ clearInterval(this.keepaliveTimer);
857
+ this.keepaliveTimer = null;
858
+ }
859
+ }
860
+
533
861
  /**
534
862
  * Close the pooled WebSocket connection.
535
863
  */
536
864
  close(): void {
865
+ this.stopKeepalive();
537
866
  if (this.wsConnection) {
538
867
  try {
539
868
  this.wsConnection.close();
@@ -545,15 +874,43 @@ class TTSResource {
545
874
  }
546
875
  }
547
876
 
548
- private parseError(message: string): Error {
549
- const lower = message.toLowerCase();
550
- if (lower.includes('auth') || lower.includes('unauthorized')) {
551
- return new AuthenticationError(message);
552
- }
553
- if (lower.includes('credit')) {
554
- return new InsufficientCreditsError(message);
555
- }
556
- return new KugelAudioError(message);
877
+ private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
878
+ return classifyWsFrame(data);
879
+ }
880
+
881
+ /**
882
+ * Create a streaming session for LLM integration.
883
+ *
884
+ * The session connects to `/ws/tts/stream` and keeps a persistent
885
+ * connection across multiple {@link StreamingSession.send} calls.
886
+ * The server auto-chunks text at sentence boundaries — no client-side
887
+ * flushing required.
888
+ *
889
+ * @param config - Session configuration (voice, model, chunking strategy).
890
+ * @param callbacks - Callbacks for audio chunks and session lifecycle events.
891
+ * @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
892
+ *
893
+ * @example
894
+ * ```typescript
895
+ * const session = client.tts.streamingSession(
896
+ * { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
897
+ * { onChunk: (chunk) => playAudio(chunk.audio) },
898
+ * );
899
+ *
900
+ * session.connect();
901
+ *
902
+ * for await (const token of llmStream) {
903
+ * session.send(token);
904
+ * }
905
+ *
906
+ * await session.close();
907
+ * ```
908
+ */
909
+ streamingSession(
910
+ config: StreamConfig,
911
+ callbacks: StreamingSessionCallbacks
912
+ ): StreamingSession {
913
+ return new StreamingSession(this.client, config, callbacks);
557
914
  }
558
915
 
559
916
  /**
@@ -574,7 +931,7 @@ class TTSResource {
574
931
  * console.log(`Audio from ${chunk.contextId}`);
575
932
  * playAudio(chunk.audio);
576
933
  * },
577
- * onContextFinal: (contextId) => {
934
+ * onContextClosed: (contextId) => {
578
935
  * console.log(`${contextId} finished`);
579
936
  * },
580
937
  * });
@@ -625,8 +982,13 @@ class MultiContextSession {
625
982
 
626
983
  /**
627
984
  * Connect to the multi-context WebSocket endpoint.
985
+ *
986
+ * The returned promise resolves once the WebSocket is OPEN so callers can
987
+ * ``await session.connect(callbacks)`` before invoking
988
+ * {@link createContext} / {@link send}. Pre-open errors reject with the
989
+ * typed error.
628
990
  */
629
- connect(callbacks: import('./types').MultiContextCallbacks): void {
991
+ connect(callbacks: import('./types').MultiContextCallbacks): Promise<void> {
630
992
  this.callbacks = callbacks;
631
993
 
632
994
  const wsUrl = this.client.ttsUrl
@@ -644,12 +1006,9 @@ class MultiContextSession {
644
1006
 
645
1007
  const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
646
1008
  this.ws = createWs(url);
1009
+ const ws = this.ws;
647
1010
 
648
- this.ws.onopen = () => {
649
- // Connection established, ready to create contexts
650
- };
651
-
652
- this.ws.onmessage = (event: { data: unknown }) => {
1011
+ ws.onmessage = (event: { data: unknown }) => {
653
1012
  try {
654
1013
  // Handle both browser (string) and Node.js (Buffer) message formats
655
1014
  const messageData = typeof event.data === 'string'
@@ -690,10 +1049,6 @@ class MultiContextSession {
690
1049
  this.callbacks.onChunk?.(chunk);
691
1050
  }
692
1051
 
693
- if (data.is_final) {
694
- this.callbacks.onContextFinal?.(data.context_id);
695
- }
696
-
697
1052
  if (data.context_closed) {
698
1053
  this.contexts.delete(data.context_id);
699
1054
  this.callbacks.onContextClosed?.(data.context_id);
@@ -712,20 +1067,51 @@ class MultiContextSession {
712
1067
  }
713
1068
  };
714
1069
 
715
- this.ws.onerror = () => {
716
- this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
717
- };
1070
+ return new Promise<void>((resolve, reject) => {
1071
+ let opened = false;
718
1072
 
719
- this.ws.onclose = (event) => {
720
- if (event.code === 4001) {
721
- this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
722
- } else if (event.code === 4003) {
723
- this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
724
- }
725
- this.ws = null;
726
- this.isStarted = false;
727
- this.contexts.clear();
728
- };
1073
+ ws.onopen = () => {
1074
+ opened = true;
1075
+ resolve();
1076
+ };
1077
+
1078
+ ws.onerror = (event: unknown) => {
1079
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1080
+ const err =
1081
+ classifyWsHandshakeError(underlying) ??
1082
+ new ConnectionError(
1083
+ 'KugelAudio multi-context WebSocket connection error. ' +
1084
+ 'Check network connectivity.',
1085
+ );
1086
+ if (!opened) reject(err);
1087
+ this.callbacks.onError?.(err);
1088
+ };
1089
+
1090
+ ws.onclose = (event) => {
1091
+ let typedErr: KugelAudioError | null = null;
1092
+ if (
1093
+ event.code === 4001 ||
1094
+ event.code === 4003 ||
1095
+ event.code === 4029 ||
1096
+ event.code === 4500
1097
+ ) {
1098
+ typedErr = classifyWsClose(event.code, event.reason);
1099
+ this.callbacks.onError?.(typedErr);
1100
+ }
1101
+ if (!opened) {
1102
+ reject(
1103
+ typedErr ??
1104
+ new ConnectionError(
1105
+ `KugelAudio multi-context WebSocket closed before ready ` +
1106
+ `(code ${event.code}).`,
1107
+ ),
1108
+ );
1109
+ }
1110
+ this.ws = null;
1111
+ this.isStarted = false;
1112
+ this.contexts.clear();
1113
+ };
1114
+ });
729
1115
  }
730
1116
 
731
1117
  /**
@@ -749,10 +1135,13 @@ class MultiContextSession {
749
1135
 
750
1136
  // Include session config on first context
751
1137
  if (!this.isStarted) {
1138
+ warnIfNoLanguage(this.config.language, this.config.normalize);
752
1139
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
753
1140
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1141
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
754
1142
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
755
1143
  if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1144
+ if (this.config.language) msg.language = this.config.language;
756
1145
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
757
1146
  }
758
1147
 
@@ -857,6 +1246,336 @@ class MultiContextSession {
857
1246
  }
858
1247
  }
859
1248
 
1249
+ /**
1250
+ * Streaming session for LLM integration via `/ws/tts/stream`.
1251
+ *
1252
+ * The server accumulates text across multiple {@link send} calls and
1253
+ * auto-chunks it at sentence boundaries, keeping the KV cache warm between
1254
+ * chunks for natural prosody. You never need to call `flush` explicitly —
1255
+ * configure {@link StreamConfig.chunkLengthSchedule} or
1256
+ * {@link StreamConfig.autoMode} instead.
1257
+ *
1258
+ * @example
1259
+ * ```typescript
1260
+ * const session = client.tts.streamingSession({
1261
+ * voiceId: 123,
1262
+ * autoMode: true,
1263
+ * chunkLengthSchedule: [50, 100, 150, 250],
1264
+ * }, {
1265
+ * onChunk: (chunk) => playAudio(chunk.audio),
1266
+ * onSessionClosed: (totalSecs) => console.log(`Done: ${totalSecs}s`),
1267
+ * });
1268
+ *
1269
+ * session.connect();
1270
+ *
1271
+ * for await (const token of llmStream) {
1272
+ * session.send(token);
1273
+ * }
1274
+ *
1275
+ * await session.close();
1276
+ * ```
1277
+ */
1278
+ class StreamingSession {
1279
+ private ws: WebSocket | null = null;
1280
+ private config: StreamConfig;
1281
+ private callbacks: StreamingSessionCallbacks;
1282
+ private client: KugelAudio;
1283
+ private configSent = false;
1284
+
1285
+ constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks) {
1286
+ this.client = client;
1287
+ this.config = config;
1288
+ this.callbacks = callbacks;
1289
+ }
1290
+
1291
+ /**
1292
+ * Open the WebSocket connection and authenticate.
1293
+ *
1294
+ * The returned promise resolves once the WebSocket is OPEN, so callers can
1295
+ * ``await session.connect()`` and then ``send()`` without racing the
1296
+ * handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
1297
+ * the promise with the typed error.
1298
+ */
1299
+ connect(): Promise<void> {
1300
+ const wsUrl = this.client.ttsUrl
1301
+ .replace('https://', 'wss://')
1302
+ .replace('http://', 'ws://');
1303
+
1304
+ let authParam: string;
1305
+ if (this.client.isToken) {
1306
+ authParam = 'token';
1307
+ } else if (this.client.isMasterKey) {
1308
+ authParam = 'master_key';
1309
+ } else {
1310
+ authParam = 'api_key';
1311
+ }
1312
+
1313
+ const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1314
+ this.ws = createWs(url);
1315
+ const ws = this.ws;
1316
+
1317
+ ws.onmessage = (event: { data: unknown }) => {
1318
+ try {
1319
+ const messageData = typeof event.data === 'string'
1320
+ ? event.data
1321
+ : event.data instanceof Buffer
1322
+ ? event.data.toString()
1323
+ : String(event.data);
1324
+ const data = JSON.parse(messageData);
1325
+
1326
+ if (data.error) {
1327
+ this.callbacks.onError?.(new KugelAudioError(data.error));
1328
+ return;
1329
+ }
1330
+
1331
+ if (data.audio) {
1332
+ const chunk: AudioChunk = {
1333
+ audio: data.audio,
1334
+ encoding: data.enc || 'pcm_s16le',
1335
+ index: data.idx,
1336
+ sampleRate: data.sr,
1337
+ samples: data.samples,
1338
+ };
1339
+ this.callbacks.onChunk?.(chunk);
1340
+ }
1341
+
1342
+ if (data.word_timestamps) {
1343
+ const timestamps = data.word_timestamps.map((w: Record<string, unknown>) => ({
1344
+ word: w.word as string,
1345
+ startMs: w.start_ms as number,
1346
+ endMs: w.end_ms as number,
1347
+ charStart: w.char_start as number,
1348
+ charEnd: w.char_end as number,
1349
+ score: (w.score as number) ?? 1.0,
1350
+ }));
1351
+ this.callbacks.onWordTimestamps?.(timestamps);
1352
+ }
1353
+
1354
+ if (data.chunk_complete) {
1355
+ this.callbacks.onChunkComplete?.(
1356
+ data.chunk_id ?? 0,
1357
+ data.audio_seconds ?? 0,
1358
+ data.gen_ms ?? 0,
1359
+ );
1360
+ }
1361
+
1362
+ if (data.generation_started) {
1363
+ this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
1364
+ }
1365
+
1366
+ if (data.session_closed) {
1367
+ this.callbacks.onSessionClosed?.(
1368
+ data.total_audio_seconds ?? 0,
1369
+ data.total_text_chunks ?? 0,
1370
+ data.total_audio_chunks ?? 0,
1371
+ );
1372
+ }
1373
+ } catch (e) {
1374
+ console.error('[KugelAudio] Failed to parse streaming session message:', e);
1375
+ }
1376
+ };
1377
+
1378
+ return new Promise<void>((resolve, reject) => {
1379
+ let opened = false;
1380
+
1381
+ ws.onopen = () => {
1382
+ opened = true;
1383
+ resolve();
1384
+ };
1385
+
1386
+ ws.onerror = (event: unknown) => {
1387
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1388
+ const err =
1389
+ classifyWsHandshakeError(underlying) ??
1390
+ new ConnectionError(
1391
+ 'KugelAudio streaming WebSocket connection error. ' +
1392
+ 'Check network connectivity.',
1393
+ );
1394
+ if (!opened) reject(err);
1395
+ this.callbacks.onError?.(err);
1396
+ };
1397
+
1398
+ ws.onclose = (event) => {
1399
+ let typedErr: KugelAudioError | null = null;
1400
+ if (
1401
+ event.code === 4001 ||
1402
+ event.code === 4003 ||
1403
+ event.code === 4029 ||
1404
+ event.code === 4500
1405
+ ) {
1406
+ typedErr = classifyWsClose(event.code, event.reason);
1407
+ this.callbacks.onError?.(typedErr);
1408
+ }
1409
+ if (!opened) {
1410
+ reject(
1411
+ typedErr ??
1412
+ new ConnectionError(
1413
+ `KugelAudio streaming WebSocket closed before ready ` +
1414
+ `(code ${event.code}).`,
1415
+ ),
1416
+ );
1417
+ }
1418
+ this.ws = null;
1419
+ this.configSent = false;
1420
+ };
1421
+ });
1422
+ }
1423
+
1424
+ /**
1425
+ * Send a text chunk to the server (e.g. one LLM output token).
1426
+ *
1427
+ * The server buffers text across multiple calls and starts generating at
1428
+ * natural sentence boundaries automatically — no need to call `flush`.
1429
+ *
1430
+ * @param text - Raw text or LLM token to append to the server buffer.
1431
+ * @param flush - Force immediate generation of whatever is buffered.
1432
+ * **Avoid calling this per-sentence from the client.** Doing so bypasses
1433
+ * the server's semantic chunking, incurs a fresh model prefill cost on
1434
+ * every flush, and makes latency *worse*, not better. Let the server
1435
+ * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1436
+ */
1437
+ send(text: string, flush = false): void {
1438
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
1439
+ throw new KugelAudioError('StreamingSession not connected. Call connect() first.');
1440
+ }
1441
+
1442
+ const msg: Record<string, unknown> = { text, flush };
1443
+
1444
+ if (!this.configSent) {
1445
+ if (this.config.voiceId !== undefined) msg.voice_id = this.config.voiceId;
1446
+ if (this.config.modelId !== undefined) msg.model_id = this.config.modelId;
1447
+ if (this.config.cfgScale !== undefined) msg.cfg_scale = this.config.cfgScale;
1448
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
1449
+ if (this.config.maxNewTokens !== undefined) msg.max_new_tokens = this.config.maxNewTokens;
1450
+ if (this.config.sampleRate !== undefined) msg.sample_rate = this.config.sampleRate;
1451
+ if (this.config.flushTimeoutMs !== undefined) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1452
+ if (this.config.maxBufferLength !== undefined) msg.max_buffer_length = this.config.maxBufferLength;
1453
+ if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1454
+ if (this.config.language !== undefined) msg.language = this.config.language;
1455
+ if (this.config.wordTimestamps) msg.word_timestamps = true;
1456
+ if (this.config.autoMode !== undefined) msg.auto_mode = this.config.autoMode;
1457
+ if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1458
+ if (this.config.speed !== undefined) msg.speed = this.config.speed;
1459
+ this.configSent = true;
1460
+ }
1461
+
1462
+ this.ws.send(JSON.stringify(msg));
1463
+ }
1464
+
1465
+ /**
1466
+ * End the current session but keep the WebSocket connection open.
1467
+ *
1468
+ * This allows starting a new session on the same connection, avoiding
1469
+ * the overhead of a new WebSocket handshake (~200-300ms). After calling
1470
+ * this, optionally call {@link updateConfig} to change voice/model settings,
1471
+ * then call {@link send} to start the next session.
1472
+ *
1473
+ * The returned promise resolves once the server confirms with a
1474
+ * `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
1475
+ * elapse without *any* server message arriving. The timer resets on every
1476
+ * incoming frame so a long final flush that streams audio for tens of
1477
+ * seconds is not truncated; only a genuinely silent server trips the fuse.
1478
+ */
1479
+ endSession(): Promise<void> {
1480
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1481
+
1482
+ const ws = this.ws;
1483
+ // Quiet timeout: resets on every incoming server message. Trips only when
1484
+ // the server has been silent for this long. The previous wall-clock fuse
1485
+ // (10 s total) silently truncated audio when the final flushed chunk
1486
+ // took longer to generate than the budget — see fix in this commit.
1487
+ const QUIET_TIMEOUT_MS = 15_000;
1488
+
1489
+ return new Promise<void>((resolve) => {
1490
+ let settled = false;
1491
+ let timer: ReturnType<typeof setTimeout>;
1492
+
1493
+ const prevMessage = ws.onmessage;
1494
+ const prevClose = ws.onclose;
1495
+
1496
+ const done = () => {
1497
+ if (settled) return;
1498
+ settled = true;
1499
+ clearTimeout(timer);
1500
+ // Restore the original handlers so subsequent endSession() calls
1501
+ // don't stack wrappers and so the typed-error onclose installed
1502
+ // by connect() remains in effect for the next session.
1503
+ ws.onmessage = prevMessage;
1504
+ ws.onclose = prevClose;
1505
+ this.configSent = false;
1506
+ resolve();
1507
+ };
1508
+
1509
+ const armQuietTimer = () => {
1510
+ clearTimeout(timer);
1511
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1512
+ };
1513
+
1514
+ armQuietTimer();
1515
+
1516
+ ws.onmessage = (event: MessageEvent) => {
1517
+ // Reset the quiet timer on EVERY incoming frame — audio chunks for
1518
+ // the final flush count as liveness, not just session_closed.
1519
+ armQuietTimer();
1520
+ if (prevMessage) prevMessage.call(ws, event);
1521
+ try {
1522
+ const raw = typeof event.data === 'string'
1523
+ ? event.data
1524
+ : event.data instanceof Buffer
1525
+ ? event.data.toString()
1526
+ : String(event.data);
1527
+ if (JSON.parse(raw).session_closed) done();
1528
+ } catch { /* ignore parse errors */ }
1529
+ };
1530
+
1531
+ ws.onclose = (event: CloseEvent) => {
1532
+ this.ws = null;
1533
+ if (prevClose) prevClose.call(ws, event);
1534
+ done();
1535
+ };
1536
+
1537
+ ws.send(JSON.stringify({ close: true }));
1538
+ });
1539
+ }
1540
+
1541
+ /**
1542
+ * Update session configuration for the next session.
1543
+ *
1544
+ * Call this after {@link endSession} and before the next {@link send}
1545
+ * to change voice, model, language, or other settings.
1546
+ */
1547
+ updateConfig(config: Partial<StreamConfig>): void {
1548
+ Object.assign(this.config, config);
1549
+ this.configSent = false;
1550
+ }
1551
+
1552
+ /**
1553
+ * Close the session and the WebSocket connection.
1554
+ *
1555
+ * For session reuse without closing the connection, use
1556
+ * {@link endSession} instead.
1557
+ *
1558
+ * The returned promise resolves once the server confirms the close with a
1559
+ * `session_closed` message, or after a 15 s **quiet** timeout (no traffic
1560
+ * from the server in that window). Audio frames from the server-side
1561
+ * final-flush of the still-buffered text are delivered to your callbacks
1562
+ * before this promise resolves, and each frame resets the quiet timer.
1563
+ */
1564
+ async close(): Promise<void> {
1565
+ await this.endSession();
1566
+
1567
+ if (this.ws) {
1568
+ try { this.ws.close(); } catch { /* already closed */ }
1569
+ this.ws = null;
1570
+ }
1571
+ }
1572
+
1573
+ /** Whether the underlying WebSocket is open. */
1574
+ get isConnected(): boolean {
1575
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
1576
+ }
1577
+ }
1578
+
860
1579
  /**
861
1580
  * KugelAudio API client.
862
1581
  *
@@ -870,13 +1589,13 @@ class MultiContextSession {
870
1589
  * // List voices
871
1590
  * const voices = await client.voices.list();
872
1591
  *
873
- * // Generate audio with fast model (1.5B params)
1592
+ * // Generate audio with fast model
874
1593
  * const audio = await client.tts.generate({
875
1594
  * text: 'Hello, world!',
876
1595
  * modelId: 'kugel-1-turbo',
877
1596
  * });
878
1597
  *
879
- * // Generate audio with premium model (7B params)
1598
+ * // Generate audio with premium model
880
1599
  * const audio = await client.tts.generate({
881
1600
  * text: 'Hello, world!',
882
1601
  * modelId: 'kugel-1',
@@ -891,6 +1610,7 @@ export class KugelAudio {
891
1610
  private _apiUrl: string;
892
1611
  private _ttsUrl: string;
893
1612
  private _timeout: number;
1613
+ private _keepalivePingInterval: number | null;
894
1614
 
895
1615
  /** Models resource */
896
1616
  public readonly models: ModelsResource;
@@ -901,17 +1621,37 @@ export class KugelAudio {
901
1621
 
902
1622
  constructor(options: KugelAudioOptions) {
903
1623
  if (!options.apiKey) {
904
- throw new Error('API key is required');
1624
+ throw new ValidationError(
1625
+ 'KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY ' +
1626
+ 'environment variable or pass { apiKey: ... } to the client. ' +
1627
+ 'Get a key at https://app.kugelaudio.com/settings/api-keys.',
1628
+ );
905
1629
  }
906
1630
 
907
- this._apiKey = options.apiKey;
1631
+ const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
1632
+ this._apiKey = cleanKey;
908
1633
  this._isMasterKey = options.isMasterKey || false;
909
1634
  this._isToken = options.isToken || false;
910
1635
  this._orgId = options.orgId;
911
- this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
1636
+
1637
+ if (options.apiUrl) {
1638
+ this._apiUrl = options.apiUrl.replace(/\/$/, '');
1639
+ } else {
1640
+ const effectiveRegion = options.region || detectedRegion || 'eu';
1641
+ if (!(effectiveRegion in REGION_URLS)) {
1642
+ throw new ValidationError(
1643
+ `Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(', ')}.`,
1644
+ );
1645
+ }
1646
+ this._apiUrl = REGION_URLS[effectiveRegion as Region];
1647
+ }
1648
+
912
1649
  // If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
913
1650
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
914
1651
  this._timeout = options.timeout || 60000;
1652
+ this._keepalivePingInterval = options.keepalivePingInterval !== undefined
1653
+ ? options.keepalivePingInterval
1654
+ : 20000;
915
1655
 
916
1656
  this.models = new ModelsResource(this);
917
1657
  this.voices = new VoicesResource(this);
@@ -965,6 +1705,11 @@ export class KugelAudio {
965
1705
  return this._ttsUrl;
966
1706
  }
967
1707
 
1708
+ /** Get keepalive ping interval in milliseconds, or null if disabled. */
1709
+ get keepalivePingInterval(): number | null {
1710
+ return this._keepalivePingInterval;
1711
+ }
1712
+
968
1713
  /**
969
1714
  * Close the client and release resources.
970
1715
  * This closes any pooled WebSocket connections.
@@ -1027,25 +1772,57 @@ export class KugelAudio {
1027
1772
 
1028
1773
  clearTimeout(timeoutId);
1029
1774
 
1030
- if (response.status === 401) {
1031
- throw new AuthenticationError('Invalid API key');
1775
+ if (!response.ok) {
1776
+ const text = await response.text();
1777
+ throw classifyHttpError(response.status, text, response.headers);
1032
1778
  }
1033
- if (response.status === 403) {
1034
- throw new InsufficientCreditsError('Access denied');
1779
+
1780
+ return await response.json();
1781
+ } catch (error) {
1782
+ clearTimeout(timeoutId);
1783
+ if (error instanceof KugelAudioError) {
1784
+ throw error;
1035
1785
  }
1036
- if (response.status === 429) {
1037
- throw new RateLimitError('Rate limit exceeded');
1786
+ if ((error as Error).name === 'AbortError') {
1787
+ throw new ConnectionError(
1788
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1789
+ );
1038
1790
  }
1791
+ throw new ConnectionError(
1792
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1793
+ 'Check network connectivity.',
1794
+ );
1795
+ }
1796
+ }
1797
+
1798
+ /**
1799
+ * Make a multipart/form-data request (for file uploads).
1800
+ * @internal Used by VoicesResource for reference file uploads.
1801
+ */
1802
+ async requestMultipart<T>(method: string, path: string, formData: FormData): Promise<T> {
1803
+ const url = `${this._apiUrl}${path}`;
1804
+
1805
+ const headers: Record<string, string> = {
1806
+ 'X-API-Key': this._apiKey,
1807
+ 'Authorization': `Bearer ${this._apiKey}`,
1808
+ };
1809
+
1810
+ const controller = new AbortController();
1811
+ const timeoutId = setTimeout(() => controller.abort(), this._timeout);
1812
+
1813
+ try {
1814
+ const response = await fetch(url, {
1815
+ method,
1816
+ headers,
1817
+ body: formData,
1818
+ signal: controller.signal,
1819
+ });
1820
+
1821
+ clearTimeout(timeoutId);
1822
+
1039
1823
  if (!response.ok) {
1040
1824
  const text = await response.text();
1041
- let message = `HTTP ${response.status}`;
1042
- try {
1043
- const json = JSON.parse(text);
1044
- message = json.detail || json.error || message;
1045
- } catch {
1046
- message = text || message;
1047
- }
1048
- throw new KugelAudioError(message, response.status);
1825
+ throw classifyHttpError(response.status, text, response.headers);
1049
1826
  }
1050
1827
 
1051
1828
  return await response.json();
@@ -1055,9 +1832,14 @@ export class KugelAudio {
1055
1832
  throw error;
1056
1833
  }
1057
1834
  if ((error as Error).name === 'AbortError') {
1058
- throw new KugelAudioError('Request timed out');
1835
+ throw new ConnectionError(
1836
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1837
+ );
1059
1838
  }
1060
- throw new KugelAudioError(`Request failed: ${(error as Error).message}`);
1839
+ throw new ConnectionError(
1840
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1841
+ 'Check network connectivity.',
1842
+ );
1061
1843
  }
1062
1844
  }
1063
1845
  }