kugelaudio 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/client.ts CHANGED
@@ -3,26 +3,50 @@
3
3
  */
4
4
 
5
5
  import {
6
- AuthenticationError,
7
- InsufficientCreditsError,
6
+ ConnectionError,
8
7
  KugelAudioError,
9
- RateLimitError,
8
+ ValidationError,
9
+ classifyHttpError,
10
+ classifyWsClose,
11
+ classifyWsFrame,
12
+ classifyWsHandshakeError,
10
13
  } from './errors';
11
14
  import type {
12
15
  AudioChunk,
13
16
  AudioResponse,
17
+ CreateVoiceOptions,
14
18
  GenerateOptions,
15
19
  GenerationStats,
16
20
  KugelAudioOptions,
17
21
  Model,
18
22
  StreamCallbacks,
19
- Voice,
23
+ StreamConfig,
24
+ StreamingSessionCallbacks,
25
+ UpdateVoiceOptions,
26
+ VoiceDetail,
27
+ VoiceListResponse,
28
+ VoiceReference,
20
29
  WordTimestamp
21
30
  } from './types';
22
31
  import { base64ToArrayBuffer } from './utils';
23
32
  import { getWebSocket } from './websocket';
24
33
 
34
+ import type { Region } from './types';
35
+
25
36
  const DEFAULT_API_URL = 'https://api.kugelaudio.com';
37
+ const EU_API_URL = 'https://api.eu.kugelaudio.com';
38
+ const SUPPORTED_REGIONS = ['eu', 'us', 'global'] as const;
39
+
40
+ const REGION_PREFIXES = ['eu-', 'us-', 'global-'] as const;
41
+
42
+ function parseApiKey(apiKey: string): { cleanKey: string; detectedRegion?: Region } {
43
+ for (const prefix of REGION_PREFIXES) {
44
+ if (apiKey.startsWith(prefix)) {
45
+ return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) as Region };
46
+ }
47
+ }
48
+ return { cleanKey: apiKey };
49
+ }
26
50
 
27
51
  /**
28
52
  * Create a new WebSocket instance.
@@ -37,6 +61,23 @@ function createWs(url: string): WebSocket {
37
61
  /** WebSocket OPEN readyState constant. */
38
62
  const WS_OPEN = 1;
39
63
 
64
+ let _languageWarningLogged = false;
65
+
66
+ function warnIfNoLanguage(
67
+ language: string | undefined,
68
+ normalize: boolean | undefined
69
+ ): void {
70
+ const normEnabled = normalize === undefined || normalize;
71
+ if (!language && normEnabled && !_languageWarningLogged) {
72
+ _languageWarningLogged = true;
73
+ console.warn(
74
+ "[KugelAudio] No 'language' set with normalization enabled — the server " +
75
+ 'will auto-detect the language, adding ~60-150ms to TTFA. Set language ' +
76
+ "(e.g., language: 'en') for optimal latency."
77
+ );
78
+ }
79
+ }
80
+
40
81
  /**
41
82
  * Models resource for listing TTS models.
42
83
  */
@@ -72,52 +113,211 @@ class VoicesResource {
72
113
  language?: string;
73
114
  includePublic?: boolean;
74
115
  limit?: number;
75
- }): Promise<Voice[]> {
116
+ offset?: number;
117
+ }): Promise<VoiceListResponse> {
76
118
  const params = new URLSearchParams();
77
119
  if (options?.language) params.set('language', options.language);
78
120
  if (options?.includePublic !== undefined) {
79
121
  params.set('include_public', String(options.includePublic));
80
122
  }
81
123
  if (options?.limit) params.set('limit', String(options.limit));
124
+ if (options?.offset) params.set('offset', String(options.offset));
82
125
 
83
126
  const query = params.toString();
84
127
  const path = query ? `/v1/voices?${query}` : '/v1/voices';
85
- const response = await this.client.request<{ voices: any[] }>('GET', path);
128
+ const response = await this.client.request<{ voices: any[]; total: number; limit: number; offset: number }>('GET', path);
86
129
 
87
- return response.voices.map((v) => ({
88
- id: v.id,
89
- name: v.name,
90
- description: v.description,
91
- category: v.category,
92
- sex: v.sex,
93
- age: v.age,
94
- supportedLanguages: v.supported_languages || [],
95
- sampleText: v.sample_text,
96
- avatarUrl: v.avatar_url,
97
- sampleUrl: v.sample_url,
98
- isPublic: v.is_public || false,
99
- verified: v.verified || false,
100
- }));
130
+ return {
131
+ voices: response.voices.map((v) => ({
132
+ id: v.id,
133
+ name: v.name,
134
+ description: v.description,
135
+ category: v.category,
136
+ sex: v.sex,
137
+ age: v.age,
138
+ supportedLanguages: v.supported_languages || [],
139
+ sampleText: v.sample_text,
140
+ avatarUrl: v.avatar_url,
141
+ sampleUrl: v.sample_url,
142
+ isPublic: v.is_public || false,
143
+ verified: v.verified || false,
144
+ })),
145
+ total: response.total,
146
+ limit: response.limit,
147
+ offset: response.offset,
148
+ };
101
149
  }
102
150
 
103
151
  /**
104
152
  * Get a specific voice by ID.
105
153
  */
106
- async get(voiceId: number): Promise<Voice> {
154
+ async get(voiceId: number): Promise<VoiceDetail> {
107
155
  const v = await this.client.request<any>('GET', `/v1/voices/${voiceId}`);
156
+ return this.mapVoiceDetail(v);
157
+ }
158
+
159
+ /**
160
+ * Create a new voice.
161
+ */
162
+ async create(options: CreateVoiceOptions): Promise<VoiceDetail> {
163
+ const metadata = {
164
+ name: options.name,
165
+ sex: options.sex,
166
+ description: options.description ?? '',
167
+ category: options.category ?? 'conversational',
168
+ age: options.age ?? 'middle_age',
169
+ quality: options.quality ?? 'mid',
170
+ supported_languages: options.supportedLanguages ?? ['en'],
171
+ is_public: options.isPublic ?? false,
172
+ sample_text: options.sampleText ?? '',
173
+ };
174
+
175
+ const formData = new FormData();
176
+ formData.append(
177
+ 'metadata',
178
+ new Blob([JSON.stringify(metadata)], { type: 'application/json' }),
179
+ );
180
+
181
+ if (options.referenceFiles) {
182
+ for (const file of options.referenceFiles) {
183
+ formData.append('files', file);
184
+ }
185
+ }
186
+
187
+ const v = await this.client.requestMultipart<any>('POST', '/v1/voices', formData);
188
+ return this.mapVoiceDetail(v);
189
+ }
190
+
191
+ /**
192
+ * Update an existing voice. Only provided fields are updated.
193
+ */
194
+ async update(voiceId: number, options: UpdateVoiceOptions): Promise<VoiceDetail> {
195
+ const payload: Record<string, unknown> = {};
196
+ if (options.name !== undefined) payload.name = options.name;
197
+ if (options.description !== undefined) payload.description = options.description;
198
+ if (options.category !== undefined) payload.category = options.category;
199
+ if (options.age !== undefined) payload.age = options.age;
200
+ if (options.sex !== undefined) payload.sex = options.sex;
201
+ if (options.quality !== undefined) payload.quality = options.quality;
202
+ if (options.supportedLanguages !== undefined) payload.supported_languages = options.supportedLanguages;
203
+ if (options.isPublic !== undefined) payload.is_public = options.isPublic;
204
+ if (options.sampleText !== undefined) payload.sample_text = options.sampleText;
205
+
206
+ const v = await this.client.request<any>('PATCH', `/v1/voices/${voiceId}`, payload);
207
+ return this.mapVoiceDetail(v);
208
+ }
209
+
210
+ /**
211
+ * Delete a voice.
212
+ */
213
+ async delete(voiceId: number): Promise<void> {
214
+ await this.client.request<any>('DELETE', `/v1/voices/${voiceId}`);
215
+ }
216
+
217
+ // -- Reference management --
218
+
219
+ /**
220
+ * List reference audio files for a voice.
221
+ */
222
+ async listReferences(voiceId: number): Promise<VoiceReference[]> {
223
+ const response = await this.client.request<{ references: any[] }>(
224
+ 'GET',
225
+ `/v1/voices/${voiceId}/references`,
226
+ );
227
+ return response.references.map((r) => this.mapVoiceReference(r));
228
+ }
229
+
230
+ /**
231
+ * Upload a reference audio file to a voice.
232
+ *
233
+ * @param voiceId - Voice ID
234
+ * @param file - Audio file (File in browser, Blob in Node.js)
235
+ * @param referenceText - Optional transcript of the reference audio
236
+ */
237
+ async addReference(
238
+ voiceId: number,
239
+ file: File | Blob,
240
+ referenceText?: string,
241
+ ): Promise<VoiceReference> {
242
+ const formData = new FormData();
243
+ formData.append('file', file);
244
+ if (referenceText) {
245
+ formData.append('reference_text', referenceText);
246
+ }
247
+
248
+ const r = await this.client.requestMultipart<any>(
249
+ 'POST',
250
+ `/v1/voices/${voiceId}/references`,
251
+ formData,
252
+ );
253
+ return this.mapVoiceReference(r);
254
+ }
255
+
256
+ /**
257
+ * Delete a reference audio file from a voice.
258
+ */
259
+ async deleteReference(voiceId: number, referenceId: number): Promise<void> {
260
+ await this.client.request<any>(
261
+ 'DELETE',
262
+ `/v1/voices/${voiceId}/references/${referenceId}`,
263
+ );
264
+ }
265
+
266
+ // -- Publishing --
267
+
268
+ /**
269
+ * Request publication of a voice. Sets it as public and marks it
270
+ * as pending verification by an admin.
271
+ */
272
+ async publish(voiceId: number): Promise<VoiceDetail> {
273
+ const v = await this.client.request<any>('POST', `/v1/voices/${voiceId}/publish`);
274
+ return this.mapVoiceDetail(v);
275
+ }
276
+
277
+ // -- Sample generation --
278
+
279
+ /**
280
+ * Trigger sample audio generation for a voice.
281
+ */
282
+ async generateSample(voiceId: number): Promise<VoiceDetail> {
283
+ const v = await this.client.request<any>(
284
+ 'POST',
285
+ `/v1/voices/${voiceId}/generate-sample`,
286
+ );
287
+ return this.mapVoiceDetail(v);
288
+ }
289
+
290
+ // -- Helpers --
291
+
292
+ private mapVoiceDetail(v: any): VoiceDetail {
108
293
  return {
109
294
  id: v.id,
110
295
  name: v.name,
111
- description: v.description,
112
- category: v.category,
113
- sex: v.sex,
296
+ description: v.description ?? '',
297
+ generativeVoiceDescription: v.generative_voice_description ?? '',
298
+ supportedLanguages: v.supported_languages ?? [],
299
+ category: v.category ?? 'cloned',
114
300
  age: v.age,
115
- supportedLanguages: v.supported_languages || [],
116
- sampleText: v.sample_text,
117
- avatarUrl: v.avatar_url,
301
+ sex: v.sex,
302
+ quality: v.quality ?? 'mid',
303
+ isPublic: v.is_public ?? false,
304
+ verified: v.verified ?? false,
305
+ pendingVerification: v.pending_verification ?? false,
118
306
  sampleUrl: v.sample_url,
119
- isPublic: v.is_public || false,
120
- verified: v.verified || false,
307
+ avatarUrl: v.avatar_url,
308
+ sampleText: v.sample_text ?? '',
309
+ };
310
+ }
311
+
312
+ private mapVoiceReference(r: any): VoiceReference {
313
+ return {
314
+ id: r.id,
315
+ voiceId: r.voice_id,
316
+ name: r.name ?? '',
317
+ referenceText: r.reference_text ?? '',
318
+ s3Path: r.s3_path ?? '',
319
+ audioUrl: r.audio_url,
320
+ isGenerated: r.is_generated ?? false,
121
321
  };
122
322
  }
123
323
  }
@@ -135,6 +335,7 @@ class TTSResource {
135
335
  reject: (error: Error) => void;
136
336
  }> = new Map();
137
337
  private requestCounter = 0;
338
+ private keepaliveTimer: ReturnType<typeof setInterval> | null = null;
138
339
 
139
340
  constructor(private client: KugelAudio) {}
140
341
 
@@ -207,6 +408,67 @@ class TTSResource {
207
408
  };
208
409
  }
209
410
 
411
+ /**
412
+ * Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
413
+ *
414
+ * **Node.js only** — this method requires the `stream` built-in module and is
415
+ * intended for server-side integrations such as Vapi custom TTS endpoints,
416
+ * Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
417
+ *
418
+ * Compared to manually wiring `onChunk` to a `Readable`, this method avoids
419
+ * a common race-condition: the stream object is created and returned **before**
420
+ * any chunks arrive, so the caller can safely pipe or attach listeners before
421
+ * the first audio byte is pushed.
422
+ *
423
+ * @example Vapi custom TTS endpoint
424
+ * ```typescript
425
+ * app.post('/synthesize', (req, res) => {
426
+ * res.setHeader('Content-Type', 'audio/pcm');
427
+ * res.setHeader('Transfer-Encoding', 'chunked');
428
+ *
429
+ * const readable = client.tts.toReadable({
430
+ * text: req.body.message.text,
431
+ * modelId: 'kugel-1-turbo',
432
+ * sampleRate: req.body.message.sampleRate,
433
+ * language: 'en',
434
+ * });
435
+ *
436
+ * readable.pipe(res);
437
+ * });
438
+ * ```
439
+ *
440
+ * @param options - TTS generation options (same as `stream()`)
441
+ * @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
442
+ * @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
443
+ */
444
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
445
+ toReadable(options: GenerateOptions, reuseConnection = true): any {
446
+ // Dynamic require keeps browser bundles free of Node.js built-ins.
447
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
448
+ const { Readable } = require('stream') as typeof import('stream');
449
+ const readable = new Readable({ read() {} });
450
+
451
+ this.stream(
452
+ options,
453
+ {
454
+ onChunk: (chunk: AudioChunk) => {
455
+ readable.push(Buffer.from(chunk.audio, 'base64'));
456
+ },
457
+ onFinal: () => {
458
+ readable.push(null);
459
+ },
460
+ onError: (error: Error) => {
461
+ readable.destroy(error);
462
+ },
463
+ },
464
+ reuseConnection
465
+ ).catch((error: Error) => {
466
+ readable.destroy(error);
467
+ });
468
+
469
+ return readable;
470
+ }
471
+
210
472
  /**
211
473
  * Build the WebSocket URL with appropriate auth param.
212
474
  */
@@ -265,11 +527,20 @@ class TTSResource {
265
527
  this.wsConnection = ws;
266
528
  this.wsUrl = url;
267
529
  this.setupMessageHandler(ws);
530
+ this.startKeepalive(ws);
268
531
  resolve(ws);
269
532
  };
270
533
 
271
- ws.onerror = () => {
272
- reject(new KugelAudioError('WebSocket connection error'));
534
+ ws.onerror = (event: unknown) => {
535
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
536
+ const typed = classifyWsHandshakeError(underlying);
537
+ reject(
538
+ typed ??
539
+ new ConnectionError(
540
+ `Could not establish KugelAudio WebSocket connection to ${url}. ` +
541
+ 'Check network connectivity.',
542
+ ),
543
+ );
273
544
  };
274
545
  });
275
546
  }
@@ -293,7 +564,7 @@ class TTSResource {
293
564
  if (!pending) return;
294
565
 
295
566
  if (data.error) {
296
- const error = this.parseError(data.error);
567
+ const error = this.parseError(data);
297
568
  pending.callbacks.onError?.(error);
298
569
  this.pendingRequests.delete(requestId);
299
570
  pending.reject(error);
@@ -307,7 +578,6 @@ class TTSResource {
307
578
  totalSamples: data.total_samples,
308
579
  durationMs: data.dur_ms,
309
580
  generationMs: data.gen_ms,
310
- ttfaMs: data.ttfa_ms,
311
581
  rtf: data.rtf,
312
582
  error: data.error,
313
583
  };
@@ -347,17 +617,25 @@ class TTSResource {
347
617
  };
348
618
 
349
619
  ws.onclose = (event) => {
350
- // Clear connection pool
620
+ // Clear connection pool and keepalive
621
+ this.stopKeepalive();
351
622
  this.wsConnection = null;
352
623
  this.wsUrl = null;
353
624
 
354
- // Reject all pending requests
625
+ // Reject all pending requests with appropriate error types
355
626
  for (const [id, pending] of this.pendingRequests) {
356
627
  pending.callbacks.onClose?.();
357
- if (event.code === 4001) {
358
- pending.reject(new AuthenticationError('Authentication failed'));
359
- } else if (event.code === 4003) {
360
- pending.reject(new InsufficientCreditsError('Insufficient credits'));
628
+ // Only surface server-initiated error close codes; normal closes
629
+ // (1000, 1001) should not reject pending requests with an error.
630
+ if (
631
+ event.code === 4001 ||
632
+ event.code === 4003 ||
633
+ event.code === 4029 ||
634
+ event.code === 4500
635
+ ) {
636
+ const error = classifyWsClose(event.code, event.reason);
637
+ pending.callbacks.onError?.(error);
638
+ pending.reject(error);
361
639
  }
362
640
  this.pendingRequests.delete(id);
363
641
  }
@@ -365,7 +643,9 @@ class TTSResource {
365
643
 
366
644
  ws.onerror = () => {
367
645
  // Reject all pending requests
368
- const error = new KugelAudioError('WebSocket connection error');
646
+ const error = new ConnectionError(
647
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
648
+ );
369
649
  for (const [id, pending] of this.pendingRequests) {
370
650
  pending.callbacks.onError?.(error);
371
651
  pending.reject(error);
@@ -400,6 +680,7 @@ class TTSResource {
400
680
  options: GenerateOptions,
401
681
  callbacks: StreamCallbacks
402
682
  ): Promise<void> {
683
+ warnIfNoLanguage(options.language, options.normalize);
403
684
  const ws = await this.getConnection();
404
685
  const requestId = ++this.requestCounter;
405
686
 
@@ -413,11 +694,14 @@ class TTSResource {
413
694
  model_id: options.modelId || 'kugel-1-turbo',
414
695
  voice_id: options.voiceId,
415
696
  cfg_scale: options.cfgScale ?? 2.0,
697
+ ...(options.temperature !== undefined && { temperature: options.temperature }),
416
698
  max_new_tokens: options.maxNewTokens ?? 2048,
417
699
  sample_rate: options.sampleRate ?? 24000,
418
700
  normalize: options.normalize ?? true,
419
701
  ...(options.language && { language: options.language }),
420
702
  ...(options.wordTimestamps && { word_timestamps: true }),
703
+ ...(options.speed !== undefined && { speed: options.speed }),
704
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
421
705
  }));
422
706
  });
423
707
  }
@@ -429,6 +713,7 @@ class TTSResource {
429
713
  options: GenerateOptions,
430
714
  callbacks: StreamCallbacks
431
715
  ): Promise<void> {
716
+ warnIfNoLanguage(options.language, options.normalize);
432
717
  return new Promise((resolve, reject) => {
433
718
  const url = this.buildWsUrl();
434
719
  const ws = createWs(url);
@@ -446,6 +731,8 @@ class TTSResource {
446
731
  normalize: options.normalize ?? true,
447
732
  ...(options.language && { language: options.language }),
448
733
  ...(options.wordTimestamps && { word_timestamps: true }),
734
+ ...(options.speed !== undefined && { speed: options.speed }),
735
+ ...(options.projectId !== undefined && { project_id: options.projectId }),
449
736
  }));
450
737
  };
451
738
 
@@ -460,7 +747,7 @@ class TTSResource {
460
747
  const data = JSON.parse(messageData);
461
748
 
462
749
  if (data.error) {
463
- const error = this.parseError(data.error);
750
+ const error = this.parseError(data);
464
751
  callbacks.onError?.(error);
465
752
  ws.close();
466
753
  reject(error);
@@ -474,7 +761,6 @@ class TTSResource {
474
761
  totalSamples: data.total_samples,
475
762
  durationMs: data.dur_ms,
476
763
  generationMs: data.gen_ms,
477
- ttfaMs: data.ttfa_ms,
478
764
  rtf: data.rtf,
479
765
  error: data.error,
480
766
  };
@@ -513,27 +799,68 @@ class TTSResource {
513
799
  }
514
800
  };
515
801
 
516
- ws.onerror = () => {
517
- const error = new KugelAudioError('WebSocket connection error');
802
+ ws.onerror = (event: unknown) => {
803
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
804
+ const error =
805
+ classifyWsHandshakeError(underlying) ??
806
+ new ConnectionError(
807
+ 'KugelAudio WebSocket connection error. Check network connectivity.',
808
+ );
518
809
  callbacks.onError?.(error);
519
810
  reject(error);
520
811
  };
521
812
 
522
813
  ws.onclose = (event) => {
523
814
  callbacks.onClose?.();
524
- if (event.code === 4001) {
525
- reject(new AuthenticationError('Authentication failed'));
526
- } else if (event.code === 4003) {
527
- reject(new InsufficientCreditsError('Insufficient credits'));
815
+ if (
816
+ event.code === 4001 ||
817
+ event.code === 4003 ||
818
+ event.code === 4029 ||
819
+ event.code === 4500
820
+ ) {
821
+ const error = classifyWsClose(event.code, event.reason);
822
+ callbacks.onError?.(error);
823
+ reject(error);
528
824
  }
529
825
  };
530
826
  });
531
827
  }
532
828
 
829
+ /**
830
+ * Start periodic keepalive pings on the pooled connection.
831
+ * Uses the ws package's ping() in Node.js; silently skips in browsers
832
+ * where WebSocket doesn't expose a ping method.
833
+ */
834
+ private startKeepalive(ws: WebSocket): void {
835
+ this.stopKeepalive();
836
+ const intervalMs = this.client.keepalivePingInterval;
837
+ if (intervalMs == null || intervalMs <= 0) return;
838
+
839
+ this.keepaliveTimer = setInterval(() => {
840
+ if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
841
+ this.stopKeepalive();
842
+ return;
843
+ }
844
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
845
+ if (typeof (ws as any).ping === 'function') {
846
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
847
+ (ws as any).ping();
848
+ }
849
+ }, intervalMs);
850
+ }
851
+
852
+ private stopKeepalive(): void {
853
+ if (this.keepaliveTimer !== null) {
854
+ clearInterval(this.keepaliveTimer);
855
+ this.keepaliveTimer = null;
856
+ }
857
+ }
858
+
533
859
  /**
534
860
  * Close the pooled WebSocket connection.
535
861
  */
536
862
  close(): void {
863
+ this.stopKeepalive();
537
864
  if (this.wsConnection) {
538
865
  try {
539
866
  this.wsConnection.close();
@@ -545,15 +872,43 @@ class TTSResource {
545
872
  }
546
873
  }
547
874
 
548
- private parseError(message: string): Error {
549
- const lower = message.toLowerCase();
550
- if (lower.includes('auth') || lower.includes('unauthorized')) {
551
- return new AuthenticationError(message);
552
- }
553
- if (lower.includes('credit')) {
554
- return new InsufficientCreditsError(message);
555
- }
556
- return new KugelAudioError(message);
875
+ private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
876
+ return classifyWsFrame(data);
877
+ }
878
+
879
+ /**
880
+ * Create a streaming session for LLM integration.
881
+ *
882
+ * The session connects to `/ws/tts/stream` and keeps a persistent
883
+ * connection across multiple {@link StreamingSession.send} calls.
884
+ * The server auto-chunks text at sentence boundaries — no client-side
885
+ * flushing required.
886
+ *
887
+ * @param config - Session configuration (voice, model, chunking strategy).
888
+ * @param callbacks - Callbacks for audio chunks and session lifecycle events.
889
+ * @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
890
+ *
891
+ * @example
892
+ * ```typescript
893
+ * const session = client.tts.streamingSession(
894
+ * { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
895
+ * { onChunk: (chunk) => playAudio(chunk.audio) },
896
+ * );
897
+ *
898
+ * session.connect();
899
+ *
900
+ * for await (const token of llmStream) {
901
+ * session.send(token);
902
+ * }
903
+ *
904
+ * await session.close();
905
+ * ```
906
+ */
907
+ streamingSession(
908
+ config: StreamConfig,
909
+ callbacks: StreamingSessionCallbacks
910
+ ): StreamingSession {
911
+ return new StreamingSession(this.client, config, callbacks);
557
912
  }
558
913
 
559
914
  /**
@@ -574,7 +929,7 @@ class TTSResource {
574
929
  * console.log(`Audio from ${chunk.contextId}`);
575
930
  * playAudio(chunk.audio);
576
931
  * },
577
- * onContextFinal: (contextId) => {
932
+ * onContextClosed: (contextId) => {
578
933
  * console.log(`${contextId} finished`);
579
934
  * },
580
935
  * });
@@ -625,8 +980,13 @@ class MultiContextSession {
625
980
 
626
981
  /**
627
982
  * Connect to the multi-context WebSocket endpoint.
983
+ *
984
+ * The returned promise resolves once the WebSocket is OPEN so callers can
985
+ * ``await session.connect(callbacks)`` before invoking
986
+ * {@link createContext} / {@link send}. Pre-open errors reject with the
987
+ * typed error.
628
988
  */
629
- connect(callbacks: import('./types').MultiContextCallbacks): void {
989
+ connect(callbacks: import('./types').MultiContextCallbacks): Promise<void> {
630
990
  this.callbacks = callbacks;
631
991
 
632
992
  const wsUrl = this.client.ttsUrl
@@ -644,12 +1004,9 @@ class MultiContextSession {
644
1004
 
645
1005
  const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
646
1006
  this.ws = createWs(url);
1007
+ const ws = this.ws;
647
1008
 
648
- this.ws.onopen = () => {
649
- // Connection established, ready to create contexts
650
- };
651
-
652
- this.ws.onmessage = (event: { data: unknown }) => {
1009
+ ws.onmessage = (event: { data: unknown }) => {
653
1010
  try {
654
1011
  // Handle both browser (string) and Node.js (Buffer) message formats
655
1012
  const messageData = typeof event.data === 'string'
@@ -690,10 +1047,6 @@ class MultiContextSession {
690
1047
  this.callbacks.onChunk?.(chunk);
691
1048
  }
692
1049
 
693
- if (data.is_final) {
694
- this.callbacks.onContextFinal?.(data.context_id);
695
- }
696
-
697
1050
  if (data.context_closed) {
698
1051
  this.contexts.delete(data.context_id);
699
1052
  this.callbacks.onContextClosed?.(data.context_id);
@@ -712,20 +1065,51 @@ class MultiContextSession {
712
1065
  }
713
1066
  };
714
1067
 
715
- this.ws.onerror = () => {
716
- this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
717
- };
1068
+ return new Promise<void>((resolve, reject) => {
1069
+ let opened = false;
718
1070
 
719
- this.ws.onclose = (event) => {
720
- if (event.code === 4001) {
721
- this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
722
- } else if (event.code === 4003) {
723
- this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
724
- }
725
- this.ws = null;
726
- this.isStarted = false;
727
- this.contexts.clear();
728
- };
1071
+ ws.onopen = () => {
1072
+ opened = true;
1073
+ resolve();
1074
+ };
1075
+
1076
+ ws.onerror = (event: unknown) => {
1077
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1078
+ const err =
1079
+ classifyWsHandshakeError(underlying) ??
1080
+ new ConnectionError(
1081
+ 'KugelAudio multi-context WebSocket connection error. ' +
1082
+ 'Check network connectivity.',
1083
+ );
1084
+ if (!opened) reject(err);
1085
+ this.callbacks.onError?.(err);
1086
+ };
1087
+
1088
+ ws.onclose = (event) => {
1089
+ let typedErr: KugelAudioError | null = null;
1090
+ if (
1091
+ event.code === 4001 ||
1092
+ event.code === 4003 ||
1093
+ event.code === 4029 ||
1094
+ event.code === 4500
1095
+ ) {
1096
+ typedErr = classifyWsClose(event.code, event.reason);
1097
+ this.callbacks.onError?.(typedErr);
1098
+ }
1099
+ if (!opened) {
1100
+ reject(
1101
+ typedErr ??
1102
+ new ConnectionError(
1103
+ `KugelAudio multi-context WebSocket closed before ready ` +
1104
+ `(code ${event.code}).`,
1105
+ ),
1106
+ );
1107
+ }
1108
+ this.ws = null;
1109
+ this.isStarted = false;
1110
+ this.contexts.clear();
1111
+ };
1112
+ });
729
1113
  }
730
1114
 
731
1115
  /**
@@ -749,10 +1133,13 @@ class MultiContextSession {
749
1133
 
750
1134
  // Include session config on first context
751
1135
  if (!this.isStarted) {
1136
+ warnIfNoLanguage(this.config.language, this.config.normalize);
752
1137
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
753
1138
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1139
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
754
1140
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
755
1141
  if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1142
+ if (this.config.language) msg.language = this.config.language;
756
1143
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
757
1144
  }
758
1145
 
@@ -857,6 +1244,336 @@ class MultiContextSession {
857
1244
  }
858
1245
  }
859
1246
 
1247
+ /**
1248
+ * Streaming session for LLM integration via `/ws/tts/stream`.
1249
+ *
1250
+ * The server accumulates text across multiple {@link send} calls and
1251
+ * auto-chunks it at sentence boundaries, keeping the KV cache warm between
1252
+ * chunks for natural prosody. You never need to call `flush` explicitly —
1253
+ * configure {@link StreamConfig.chunkLengthSchedule} or
1254
+ * {@link StreamConfig.autoMode} instead.
1255
+ *
1256
+ * @example
1257
+ * ```typescript
1258
+ * const session = client.tts.streamingSession({
1259
+ * voiceId: 123,
1260
+ * autoMode: true,
1261
+ * chunkLengthSchedule: [50, 100, 150, 250],
1262
+ * }, {
1263
+ * onChunk: (chunk) => playAudio(chunk.audio),
1264
+ * onSessionClosed: (totalSecs) => console.log(`Done: ${totalSecs}s`),
1265
+ * });
1266
+ *
1267
+ * session.connect();
1268
+ *
1269
+ * for await (const token of llmStream) {
1270
+ * session.send(token);
1271
+ * }
1272
+ *
1273
+ * await session.close();
1274
+ * ```
1275
+ */
1276
+ class StreamingSession {
1277
+ private ws: WebSocket | null = null;
1278
+ private config: StreamConfig;
1279
+ private callbacks: StreamingSessionCallbacks;
1280
+ private client: KugelAudio;
1281
+ private configSent = false;
1282
+
1283
+ constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks) {
1284
+ this.client = client;
1285
+ this.config = config;
1286
+ this.callbacks = callbacks;
1287
+ }
1288
+
1289
+ /**
1290
+ * Open the WebSocket connection and authenticate.
1291
+ *
1292
+ * The returned promise resolves once the WebSocket is OPEN, so callers can
1293
+ * ``await session.connect()`` and then ``send()`` without racing the
1294
+ * handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
1295
+ * the promise with the typed error.
1296
+ */
1297
+ connect(): Promise<void> {
1298
+ const wsUrl = this.client.ttsUrl
1299
+ .replace('https://', 'wss://')
1300
+ .replace('http://', 'ws://');
1301
+
1302
+ let authParam: string;
1303
+ if (this.client.isToken) {
1304
+ authParam = 'token';
1305
+ } else if (this.client.isMasterKey) {
1306
+ authParam = 'master_key';
1307
+ } else {
1308
+ authParam = 'api_key';
1309
+ }
1310
+
1311
+ const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1312
+ this.ws = createWs(url);
1313
+ const ws = this.ws;
1314
+
1315
+ ws.onmessage = (event: { data: unknown }) => {
1316
+ try {
1317
+ const messageData = typeof event.data === 'string'
1318
+ ? event.data
1319
+ : event.data instanceof Buffer
1320
+ ? event.data.toString()
1321
+ : String(event.data);
1322
+ const data = JSON.parse(messageData);
1323
+
1324
+ if (data.error) {
1325
+ this.callbacks.onError?.(new KugelAudioError(data.error));
1326
+ return;
1327
+ }
1328
+
1329
+ if (data.audio) {
1330
+ const chunk: AudioChunk = {
1331
+ audio: data.audio,
1332
+ encoding: data.enc || 'pcm_s16le',
1333
+ index: data.idx,
1334
+ sampleRate: data.sr,
1335
+ samples: data.samples,
1336
+ };
1337
+ this.callbacks.onChunk?.(chunk);
1338
+ }
1339
+
1340
+ if (data.word_timestamps) {
1341
+ const timestamps = data.word_timestamps.map((w: Record<string, unknown>) => ({
1342
+ word: w.word as string,
1343
+ startMs: w.start_ms as number,
1344
+ endMs: w.end_ms as number,
1345
+ charStart: w.char_start as number,
1346
+ charEnd: w.char_end as number,
1347
+ score: (w.score as number) ?? 1.0,
1348
+ }));
1349
+ this.callbacks.onWordTimestamps?.(timestamps);
1350
+ }
1351
+
1352
+ if (data.chunk_complete) {
1353
+ this.callbacks.onChunkComplete?.(
1354
+ data.chunk_id ?? 0,
1355
+ data.audio_seconds ?? 0,
1356
+ data.gen_ms ?? 0,
1357
+ );
1358
+ }
1359
+
1360
+ if (data.generation_started) {
1361
+ this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
1362
+ }
1363
+
1364
+ if (data.session_closed) {
1365
+ this.callbacks.onSessionClosed?.(
1366
+ data.total_audio_seconds ?? 0,
1367
+ data.total_text_chunks ?? 0,
1368
+ data.total_audio_chunks ?? 0,
1369
+ );
1370
+ }
1371
+ } catch (e) {
1372
+ console.error('[KugelAudio] Failed to parse streaming session message:', e);
1373
+ }
1374
+ };
1375
+
1376
+ return new Promise<void>((resolve, reject) => {
1377
+ let opened = false;
1378
+
1379
+ ws.onopen = () => {
1380
+ opened = true;
1381
+ resolve();
1382
+ };
1383
+
1384
+ ws.onerror = (event: unknown) => {
1385
+ const underlying = (event as { error?: unknown } | null)?.error ?? event;
1386
+ const err =
1387
+ classifyWsHandshakeError(underlying) ??
1388
+ new ConnectionError(
1389
+ 'KugelAudio streaming WebSocket connection error. ' +
1390
+ 'Check network connectivity.',
1391
+ );
1392
+ if (!opened) reject(err);
1393
+ this.callbacks.onError?.(err);
1394
+ };
1395
+
1396
+ ws.onclose = (event) => {
1397
+ let typedErr: KugelAudioError | null = null;
1398
+ if (
1399
+ event.code === 4001 ||
1400
+ event.code === 4003 ||
1401
+ event.code === 4029 ||
1402
+ event.code === 4500
1403
+ ) {
1404
+ typedErr = classifyWsClose(event.code, event.reason);
1405
+ this.callbacks.onError?.(typedErr);
1406
+ }
1407
+ if (!opened) {
1408
+ reject(
1409
+ typedErr ??
1410
+ new ConnectionError(
1411
+ `KugelAudio streaming WebSocket closed before ready ` +
1412
+ `(code ${event.code}).`,
1413
+ ),
1414
+ );
1415
+ }
1416
+ this.ws = null;
1417
+ this.configSent = false;
1418
+ };
1419
+ });
1420
+ }
1421
+
1422
+ /**
1423
+ * Send a text chunk to the server (e.g. one LLM output token).
1424
+ *
1425
+ * The server buffers text across multiple calls and starts generating at
1426
+ * natural sentence boundaries automatically — no need to call `flush`.
1427
+ *
1428
+ * @param text - Raw text or LLM token to append to the server buffer.
1429
+ * @param flush - Force immediate generation of whatever is buffered.
1430
+ * **Avoid calling this per-sentence from the client.** Doing so bypasses
1431
+ * the server's semantic chunking, incurs a fresh model prefill cost on
1432
+ * every flush, and makes latency *worse*, not better. Let the server
1433
+ * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1434
+ */
1435
+ send(text: string, flush = false): void {
1436
+ if (!this.ws || this.ws.readyState !== WS_OPEN) {
1437
+ throw new KugelAudioError('StreamingSession not connected. Call connect() first.');
1438
+ }
1439
+
1440
+ const msg: Record<string, unknown> = { text, flush };
1441
+
1442
+ if (!this.configSent) {
1443
+ if (this.config.voiceId !== undefined) msg.voice_id = this.config.voiceId;
1444
+ if (this.config.modelId !== undefined) msg.model_id = this.config.modelId;
1445
+ if (this.config.cfgScale !== undefined) msg.cfg_scale = this.config.cfgScale;
1446
+ if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
1447
+ if (this.config.maxNewTokens !== undefined) msg.max_new_tokens = this.config.maxNewTokens;
1448
+ if (this.config.sampleRate !== undefined) msg.sample_rate = this.config.sampleRate;
1449
+ if (this.config.flushTimeoutMs !== undefined) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1450
+ if (this.config.maxBufferLength !== undefined) msg.max_buffer_length = this.config.maxBufferLength;
1451
+ if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
1452
+ if (this.config.language !== undefined) msg.language = this.config.language;
1453
+ if (this.config.wordTimestamps) msg.word_timestamps = true;
1454
+ if (this.config.autoMode !== undefined) msg.auto_mode = this.config.autoMode;
1455
+ if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1456
+ if (this.config.speed !== undefined) msg.speed = this.config.speed;
1457
+ this.configSent = true;
1458
+ }
1459
+
1460
+ this.ws.send(JSON.stringify(msg));
1461
+ }
1462
+
1463
+ /**
1464
+ * End the current session but keep the WebSocket connection open.
1465
+ *
1466
+ * This allows starting a new session on the same connection, avoiding
1467
+ * the overhead of a new WebSocket handshake (~200-300ms). After calling
1468
+ * this, optionally call {@link updateConfig} to change voice/model settings,
1469
+ * then call {@link send} to start the next session.
1470
+ *
1471
+ * The returned promise resolves once the server confirms with a
1472
+ * `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
1473
+ * elapse without *any* server message arriving. The timer resets on every
1474
+ * incoming frame so a long final flush that streams audio for tens of
1475
+ * seconds is not truncated; only a genuinely silent server trips the fuse.
1476
+ */
1477
+ endSession(): Promise<void> {
1478
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1479
+
1480
+ const ws = this.ws;
1481
+ // Quiet timeout: resets on every incoming server message. Trips only when
1482
+ // the server has been silent for this long. The previous wall-clock fuse
1483
+ // (10 s total) silently truncated audio when the final flushed chunk
1484
+ // took longer to generate than the budget — see fix in this commit.
1485
+ const QUIET_TIMEOUT_MS = 15_000;
1486
+
1487
+ return new Promise<void>((resolve) => {
1488
+ let settled = false;
1489
+ let timer: ReturnType<typeof setTimeout>;
1490
+
1491
+ const prevMessage = ws.onmessage;
1492
+ const prevClose = ws.onclose;
1493
+
1494
+ const done = () => {
1495
+ if (settled) return;
1496
+ settled = true;
1497
+ clearTimeout(timer);
1498
+ // Restore the original handlers so subsequent endSession() calls
1499
+ // don't stack wrappers and so the typed-error onclose installed
1500
+ // by connect() remains in effect for the next session.
1501
+ ws.onmessage = prevMessage;
1502
+ ws.onclose = prevClose;
1503
+ this.configSent = false;
1504
+ resolve();
1505
+ };
1506
+
1507
+ const armQuietTimer = () => {
1508
+ clearTimeout(timer);
1509
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1510
+ };
1511
+
1512
+ armQuietTimer();
1513
+
1514
+ ws.onmessage = (event: MessageEvent) => {
1515
+ // Reset the quiet timer on EVERY incoming frame — audio chunks for
1516
+ // the final flush count as liveness, not just session_closed.
1517
+ armQuietTimer();
1518
+ if (prevMessage) prevMessage.call(ws, event);
1519
+ try {
1520
+ const raw = typeof event.data === 'string'
1521
+ ? event.data
1522
+ : event.data instanceof Buffer
1523
+ ? event.data.toString()
1524
+ : String(event.data);
1525
+ if (JSON.parse(raw).session_closed) done();
1526
+ } catch { /* ignore parse errors */ }
1527
+ };
1528
+
1529
+ ws.onclose = (event: CloseEvent) => {
1530
+ this.ws = null;
1531
+ if (prevClose) prevClose.call(ws, event);
1532
+ done();
1533
+ };
1534
+
1535
+ ws.send(JSON.stringify({ close: true }));
1536
+ });
1537
+ }
1538
+
1539
+ /**
1540
+ * Update session configuration for the next session.
1541
+ *
1542
+ * Call this after {@link endSession} and before the next {@link send}
1543
+ * to change voice, model, language, or other settings.
1544
+ */
1545
+ updateConfig(config: Partial<StreamConfig>): void {
1546
+ Object.assign(this.config, config);
1547
+ this.configSent = false;
1548
+ }
1549
+
1550
+ /**
1551
+ * Close the session and the WebSocket connection.
1552
+ *
1553
+ * For session reuse without closing the connection, use
1554
+ * {@link endSession} instead.
1555
+ *
1556
+ * The returned promise resolves once the server confirms the close with a
1557
+ * `session_closed` message, or after a 15 s **quiet** timeout (no traffic
1558
+ * from the server in that window). Audio frames from the server-side
1559
+ * final-flush of the still-buffered text are delivered to your callbacks
1560
+ * before this promise resolves, and each frame resets the quiet timer.
1561
+ */
1562
+ async close(): Promise<void> {
1563
+ await this.endSession();
1564
+
1565
+ if (this.ws) {
1566
+ try { this.ws.close(); } catch { /* already closed */ }
1567
+ this.ws = null;
1568
+ }
1569
+ }
1570
+
1571
+ /** Whether the underlying WebSocket is open. */
1572
+ get isConnected(): boolean {
1573
+ return this.ws !== null && this.ws.readyState === WS_OPEN;
1574
+ }
1575
+ }
1576
+
860
1577
  /**
861
1578
  * KugelAudio API client.
862
1579
  *
@@ -870,13 +1587,13 @@ class MultiContextSession {
870
1587
  * // List voices
871
1588
  * const voices = await client.voices.list();
872
1589
  *
873
- * // Generate audio with fast model (1.5B params)
1590
+ * // Generate audio with fast model
874
1591
  * const audio = await client.tts.generate({
875
1592
  * text: 'Hello, world!',
876
1593
  * modelId: 'kugel-1-turbo',
877
1594
  * });
878
1595
  *
879
- * // Generate audio with premium model (7B params)
1596
+ * // Generate audio with premium model
880
1597
  * const audio = await client.tts.generate({
881
1598
  * text: 'Hello, world!',
882
1599
  * modelId: 'kugel-1',
@@ -891,6 +1608,7 @@ export class KugelAudio {
891
1608
  private _apiUrl: string;
892
1609
  private _ttsUrl: string;
893
1610
  private _timeout: number;
1611
+ private _keepalivePingInterval: number | null;
894
1612
 
895
1613
  /** Models resource */
896
1614
  public readonly models: ModelsResource;
@@ -901,17 +1619,40 @@ export class KugelAudio {
901
1619
 
902
1620
  constructor(options: KugelAudioOptions) {
903
1621
  if (!options.apiKey) {
904
- throw new Error('API key is required');
1622
+ throw new ValidationError(
1623
+ 'KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY ' +
1624
+ 'environment variable or pass { apiKey: ... } to the client. ' +
1625
+ 'Get a key at https://app.kugelaudio.com/settings/api-keys.',
1626
+ );
905
1627
  }
906
1628
 
907
- this._apiKey = options.apiKey;
1629
+ const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
1630
+ this._apiKey = cleanKey;
908
1631
  this._isMasterKey = options.isMasterKey || false;
909
1632
  this._isToken = options.isToken || false;
910
1633
  this._orgId = options.orgId;
911
- this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
1634
+
1635
+ if (options.apiUrl) {
1636
+ this._apiUrl = options.apiUrl.replace(/\/$/, '');
1637
+ } else {
1638
+ const effectiveRegion = options.region || detectedRegion;
1639
+ if (!effectiveRegion) {
1640
+ this._apiUrl = DEFAULT_API_URL;
1641
+ } else if (!SUPPORTED_REGIONS.includes(effectiveRegion as Region)) {
1642
+ throw new ValidationError(
1643
+ `Invalid region '${effectiveRegion}'. Must be one of: ${SUPPORTED_REGIONS.join(', ')}.`,
1644
+ );
1645
+ } else {
1646
+ this._apiUrl = effectiveRegion === 'eu' ? EU_API_URL : DEFAULT_API_URL;
1647
+ }
1648
+ }
1649
+
912
1650
  // If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
913
1651
  this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
914
1652
  this._timeout = options.timeout || 60000;
1653
+ this._keepalivePingInterval = options.keepalivePingInterval !== undefined
1654
+ ? options.keepalivePingInterval
1655
+ : 20000;
915
1656
 
916
1657
  this.models = new ModelsResource(this);
917
1658
  this.voices = new VoicesResource(this);
@@ -965,6 +1706,11 @@ export class KugelAudio {
965
1706
  return this._ttsUrl;
966
1707
  }
967
1708
 
1709
+ /** Get keepalive ping interval in milliseconds, or null if disabled. */
1710
+ get keepalivePingInterval(): number | null {
1711
+ return this._keepalivePingInterval;
1712
+ }
1713
+
968
1714
  /**
969
1715
  * Close the client and release resources.
970
1716
  * This closes any pooled WebSocket connections.
@@ -1027,25 +1773,57 @@ export class KugelAudio {
1027
1773
 
1028
1774
  clearTimeout(timeoutId);
1029
1775
 
1030
- if (response.status === 401) {
1031
- throw new AuthenticationError('Invalid API key');
1776
+ if (!response.ok) {
1777
+ const text = await response.text();
1778
+ throw classifyHttpError(response.status, text, response.headers);
1032
1779
  }
1033
- if (response.status === 403) {
1034
- throw new InsufficientCreditsError('Access denied');
1780
+
1781
+ return await response.json();
1782
+ } catch (error) {
1783
+ clearTimeout(timeoutId);
1784
+ if (error instanceof KugelAudioError) {
1785
+ throw error;
1035
1786
  }
1036
- if (response.status === 429) {
1037
- throw new RateLimitError('Rate limit exceeded');
1787
+ if ((error as Error).name === 'AbortError') {
1788
+ throw new ConnectionError(
1789
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1790
+ );
1038
1791
  }
1792
+ throw new ConnectionError(
1793
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1794
+ 'Check network connectivity.',
1795
+ );
1796
+ }
1797
+ }
1798
+
1799
+ /**
1800
+ * Make a multipart/form-data request (for file uploads).
1801
+ * @internal Used by VoicesResource for reference file uploads.
1802
+ */
1803
+ async requestMultipart<T>(method: string, path: string, formData: FormData): Promise<T> {
1804
+ const url = `${this._apiUrl}${path}`;
1805
+
1806
+ const headers: Record<string, string> = {
1807
+ 'X-API-Key': this._apiKey,
1808
+ 'Authorization': `Bearer ${this._apiKey}`,
1809
+ };
1810
+
1811
+ const controller = new AbortController();
1812
+ const timeoutId = setTimeout(() => controller.abort(), this._timeout);
1813
+
1814
+ try {
1815
+ const response = await fetch(url, {
1816
+ method,
1817
+ headers,
1818
+ body: formData,
1819
+ signal: controller.signal,
1820
+ });
1821
+
1822
+ clearTimeout(timeoutId);
1823
+
1039
1824
  if (!response.ok) {
1040
1825
  const text = await response.text();
1041
- let message = `HTTP ${response.status}`;
1042
- try {
1043
- const json = JSON.parse(text);
1044
- message = json.detail || json.error || message;
1045
- } catch {
1046
- message = text || message;
1047
- }
1048
- throw new KugelAudioError(message, response.status);
1826
+ throw classifyHttpError(response.status, text, response.headers);
1049
1827
  }
1050
1828
 
1051
1829
  return await response.json();
@@ -1055,10 +1833,14 @@ export class KugelAudio {
1055
1833
  throw error;
1056
1834
  }
1057
1835
  if ((error as Error).name === 'AbortError') {
1058
- throw new KugelAudioError('Request timed out');
1836
+ throw new ConnectionError(
1837
+ `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
1838
+ );
1059
1839
  }
1060
- throw new KugelAudioError(`Request failed: ${(error as Error).message}`);
1840
+ throw new ConnectionError(
1841
+ `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
1842
+ 'Check network connectivity.',
1843
+ );
1061
1844
  }
1062
1845
  }
1063
1846
  }
1064
-