npm - kugelaudio - Versions diffs - 0.2.2 → 0.3.0 - Mend

kugelaudio 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/client.ts CHANGED Viewed

@@ -3,25 +3,52 @@
  */
 import {
-    AuthenticationError,
-    InsufficientCreditsError,
+    ConnectionError,
     KugelAudioError,
-    RateLimitError,
+    ValidationError,
+    classifyHttpError,
+    classifyWsClose,
+    classifyWsFrame,
+    classifyWsHandshakeError,
 } from './errors';
 import type {
     AudioChunk,
     AudioResponse,
+    CreateVoiceOptions,
     GenerateOptions,
     GenerationStats,
     KugelAudioOptions,
     Model,
     StreamCallbacks,
-    Voice
+    StreamConfig,
+    StreamingSessionCallbacks,
+    UpdateVoiceOptions,
+    VoiceDetail,
+    VoiceListResponse,
+    VoiceReference,
+    WordTimestamp
 } from './types';
 import { base64ToArrayBuffer } from './utils';
 import { getWebSocket } from './websocket';
-const DEFAULT_API_URL = 'https://api.kugelaudio.com';
+import type { Region } from './types';
+const REGION_URLS: Record<Region, string> = {
+  eu: 'https://api.kugelaudio.com',
+  us: 'https://us-api.kugelaudio.com',
+  global: 'https://global-api.kugelaudio.com',
+};
+const REGION_PREFIXES = ['eu-', 'us-', 'global-'] as const;
+function parseApiKey(apiKey: string): { cleanKey: string; detectedRegion?: Region } {
+  for (const prefix of REGION_PREFIXES) {
+    if (apiKey.startsWith(prefix)) {
+      return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) as Region };
+    }
+  }
+  return { cleanKey: apiKey };
+}
 /**
  * Create a new WebSocket instance.
@@ -36,6 +63,23 @@ function createWs(url: string): WebSocket {
 /** WebSocket OPEN readyState constant. */
 const WS_OPEN = 1;
+let _languageWarningLogged = false;
+function warnIfNoLanguage(
+  language: string | undefined,
+  normalize: boolean | undefined
+): void {
+  const normEnabled = normalize === undefined || normalize;
+  if (!language && normEnabled && !_languageWarningLogged) {
+    _languageWarningLogged = true;
+    console.warn(
+      "[KugelAudio] No 'language' set with normalization enabled — the server " +
+        'will auto-detect the language, adding ~60-150ms to TTFA. Set language ' +
+        "(e.g., language: 'en') for optimal latency."
+    );
+  }
+}
 /**
  * Models resource for listing TTS models.
  */
@@ -71,52 +115,211 @@ class VoicesResource {
     language?: string;
     includePublic?: boolean;
     limit?: number;
-  }): Promise<Voice[]> {
+    offset?: number;
+  }): Promise<VoiceListResponse> {
     const params = new URLSearchParams();
     if (options?.language) params.set('language', options.language);
     if (options?.includePublic !== undefined) {
       params.set('include_public', String(options.includePublic));
     }
     if (options?.limit) params.set('limit', String(options.limit));
+    if (options?.offset) params.set('offset', String(options.offset));
     const query = params.toString();
     const path = query ? `/v1/voices?${query}` : '/v1/voices';
-    const response = await this.client.request<{ voices: any[] }>('GET', path);
+    const response = await this.client.request<{ voices: any[]; total: number; limit: number; offset: number }>('GET', path);
-    return response.voices.map((v) => ({
-      id: v.id,
-      name: v.name,
-      description: v.description,
-      category: v.category,
-      sex: v.sex,
-      age: v.age,
-      supportedLanguages: v.supported_languages || [],
-      sampleText: v.sample_text,
-      avatarUrl: v.avatar_url,
-      sampleUrl: v.sample_url,
-      isPublic: v.is_public || false,
-      verified: v.verified || false,
-    }));
+    return {
+      voices: response.voices.map((v) => ({
+        id: v.id,
+        name: v.name,
+        description: v.description,
+        category: v.category,
+        sex: v.sex,
+        age: v.age,
+        supportedLanguages: v.supported_languages || [],
+        sampleText: v.sample_text,
+        avatarUrl: v.avatar_url,
+        sampleUrl: v.sample_url,
+        isPublic: v.is_public || false,
+        verified: v.verified || false,
+      })),
+      total: response.total,
+      limit: response.limit,
+      offset: response.offset,
+    };
   }
   /**
    * Get a specific voice by ID.
    */
-  async get(voiceId: number): Promise<Voice> {
+  async get(voiceId: number): Promise<VoiceDetail> {
     const v = await this.client.request<any>('GET', `/v1/voices/${voiceId}`);
+    return this.mapVoiceDetail(v);
+  }
+  /**
+   * Create a new voice.
+   */
+  async create(options: CreateVoiceOptions): Promise<VoiceDetail> {
+    const metadata = {
+      name: options.name,
+      sex: options.sex,
+      description: options.description ?? '',
+      category: options.category ?? 'conversational',
+      age: options.age ?? 'middle_age',
+      quality: options.quality ?? 'mid',
+      supported_languages: options.supportedLanguages ?? ['en'],
+      is_public: options.isPublic ?? false,
+      sample_text: options.sampleText ?? '',
+    };
+    const formData = new FormData();
+    formData.append(
+      'metadata',
+      new Blob([JSON.stringify(metadata)], { type: 'application/json' }),
+    );
+    if (options.referenceFiles) {
+      for (const file of options.referenceFiles) {
+        formData.append('files', file);
+      }
+    }
+    const v = await this.client.requestMultipart<any>('POST', '/v1/voices', formData);
+    return this.mapVoiceDetail(v);
+  }
+  /**
+   * Update an existing voice. Only provided fields are updated.
+   */
+  async update(voiceId: number, options: UpdateVoiceOptions): Promise<VoiceDetail> {
+    const payload: Record<string, unknown> = {};
+    if (options.name !== undefined) payload.name = options.name;
+    if (options.description !== undefined) payload.description = options.description;
+    if (options.category !== undefined) payload.category = options.category;
+    if (options.age !== undefined) payload.age = options.age;
+    if (options.sex !== undefined) payload.sex = options.sex;
+    if (options.quality !== undefined) payload.quality = options.quality;
+    if (options.supportedLanguages !== undefined) payload.supported_languages = options.supportedLanguages;
+    if (options.isPublic !== undefined) payload.is_public = options.isPublic;
+    if (options.sampleText !== undefined) payload.sample_text = options.sampleText;
+    const v = await this.client.request<any>('PATCH', `/v1/voices/${voiceId}`, payload);
+    return this.mapVoiceDetail(v);
+  }
+  /**
+   * Delete a voice.
+   */
+  async delete(voiceId: number): Promise<void> {
+    await this.client.request<any>('DELETE', `/v1/voices/${voiceId}`);
+  }
+  // -- Reference management --
+  /**
+   * List reference audio files for a voice.
+   */
+  async listReferences(voiceId: number): Promise<VoiceReference[]> {
+    const response = await this.client.request<{ references: any[] }>(
+      'GET',
+      `/v1/voices/${voiceId}/references`,
+    );
+    return response.references.map((r) => this.mapVoiceReference(r));
+  }
+  /**
+   * Upload a reference audio file to a voice.
+   *
+   * @param voiceId - Voice ID
+   * @param file - Audio file (File in browser, Blob in Node.js)
+   * @param referenceText - Optional transcript of the reference audio
+   */
+  async addReference(
+    voiceId: number,
+    file: File | Blob,
+    referenceText?: string,
+  ): Promise<VoiceReference> {
+    const formData = new FormData();
+    formData.append('file', file);
+    if (referenceText) {
+      formData.append('reference_text', referenceText);
+    }
+    const r = await this.client.requestMultipart<any>(
+      'POST',
+      `/v1/voices/${voiceId}/references`,
+      formData,
+    );
+    return this.mapVoiceReference(r);
+  }
+  /**
+   * Delete a reference audio file from a voice.
+   */
+  async deleteReference(voiceId: number, referenceId: number): Promise<void> {
+    await this.client.request<any>(
+      'DELETE',
+      `/v1/voices/${voiceId}/references/${referenceId}`,
+    );
+  }
+  // -- Publishing --
+  /**
+   * Request publication of a voice. Sets it as public and marks it
+   * as pending verification by an admin.
+   */
+  async publish(voiceId: number): Promise<VoiceDetail> {
+    const v = await this.client.request<any>('POST', `/v1/voices/${voiceId}/publish`);
+    return this.mapVoiceDetail(v);
+  }
+  // -- Sample generation --
+  /**
+   * Trigger sample audio generation for a voice.
+   */
+  async generateSample(voiceId: number): Promise<VoiceDetail> {
+    const v = await this.client.request<any>(
+      'POST',
+      `/v1/voices/${voiceId}/generate-sample`,
+    );
+    return this.mapVoiceDetail(v);
+  }
+  // -- Helpers --
+  private mapVoiceDetail(v: any): VoiceDetail {
     return {
       id: v.id,
       name: v.name,
-      description: v.description,
-      category: v.category,
-      sex: v.sex,
+      description: v.description ?? '',
+      generativeVoiceDescription: v.generative_voice_description ?? '',
+      supportedLanguages: v.supported_languages ?? [],
+      category: v.category ?? 'cloned',
       age: v.age,
-      supportedLanguages: v.supported_languages || [],
-      sampleText: v.sample_text,
-      avatarUrl: v.avatar_url,
+      sex: v.sex,
+      quality: v.quality ?? 'mid',
+      isPublic: v.is_public ?? false,
+      verified: v.verified ?? false,
+      pendingVerification: v.pending_verification ?? false,
       sampleUrl: v.sample_url,
-      isPublic: v.is_public || false,
-      verified: v.verified || false,
+      avatarUrl: v.avatar_url,
+      sampleText: v.sample_text ?? '',
+    };
+  }
+  private mapVoiceReference(r: any): VoiceReference {
+    return {
+      id: r.id,
+      voiceId: r.voice_id,
+      name: r.name ?? '',
+      referenceText: r.reference_text ?? '',
+      s3Path: r.s3_path ?? '',
+      audioUrl: r.audio_url,
+      isGenerated: r.is_generated ?? false,
     };
   }
 }
@@ -134,6 +337,7 @@ class TTSResource {
     reject: (error: Error) => void;
   }> = new Map();
   private requestCounter = 0;
+  private keepaliveTimer: ReturnType<typeof setInterval> | null = null;
   constructor(private client: KugelAudio) {}
@@ -172,11 +376,15 @@ class TTSResource {
   async generate(options: GenerateOptions): Promise<AudioResponse> {
     const chunks: ArrayBuffer[] = [];
     let finalStats: GenerationStats | undefined;
+    const allTimestamps: WordTimestamp[] = [];
     await this.stream(options, {
       onChunk: (chunk) => {
         chunks.push(base64ToArrayBuffer(chunk.audio));
       },
+      onWordTimestamps: (timestamps) => {
+        allTimestamps.push(...timestamps);
+      },
       onFinal: (stats) => {
         finalStats = stats;
       },
@@ -198,9 +406,71 @@ class TTSResource {
       durationMs: finalStats ? finalStats.durationMs : 0,
       generationMs: finalStats ? finalStats.generationMs : 0,
       rtf: finalStats ? finalStats.rtf : 0,
+      wordTimestamps: allTimestamps,
     };
   }
+  /**
+   * Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
+   *
+   * **Node.js only** — this method requires the `stream` built-in module and is
+   * intended for server-side integrations such as Vapi custom TTS endpoints,
+   * Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
+   *
+   * Compared to manually wiring `onChunk` to a `Readable`, this method avoids
+   * a common race-condition: the stream object is created and returned **before**
+   * any chunks arrive, so the caller can safely pipe or attach listeners before
+   * the first audio byte is pushed.
+   *
+   * @example Vapi custom TTS endpoint
+   * ```typescript
+   * app.post('/synthesize', (req, res) => {
+   *   res.setHeader('Content-Type', 'audio/pcm');
+   *   res.setHeader('Transfer-Encoding', 'chunked');
+   *
+   *   const readable = client.tts.toReadable({
+   *     text: req.body.message.text,
+   *     modelId: 'kugel-1-turbo',
+   *     sampleRate: req.body.message.sampleRate,
+   *     language: 'en',
+   *   });
+   *
+   *   readable.pipe(res);
+   * });
+   * ```
+   *
+   * @param options - TTS generation options (same as `stream()`)
+   * @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
+   * @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
+   */
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  toReadable(options: GenerateOptions, reuseConnection = true): any {
+    // Dynamic require keeps browser bundles free of Node.js built-ins.
+    // eslint-disable-next-line @typescript-eslint/no-require-imports
+    const { Readable } = require('stream') as typeof import('stream');
+    const readable = new Readable({ read() {} });
+    this.stream(
+      options,
+      {
+        onChunk: (chunk: AudioChunk) => {
+          readable.push(Buffer.from(chunk.audio, 'base64'));
+        },
+        onFinal: () => {
+          readable.push(null);
+        },
+        onError: (error: Error) => {
+          readable.destroy(error);
+        },
+      },
+      reuseConnection
+    ).catch((error: Error) => {
+      readable.destroy(error);
+    });
+    return readable;
+  }
   /**
    * Build the WebSocket URL with appropriate auth param.
    */
@@ -259,11 +529,20 @@ class TTSResource {
         this.wsConnection = ws;
         this.wsUrl = url;
         this.setupMessageHandler(ws);
+        this.startKeepalive(ws);
         resolve(ws);
       };
-      ws.onerror = () => {
-        reject(new KugelAudioError('WebSocket connection error'));
+      ws.onerror = (event: unknown) => {
+        const underlying = (event as { error?: unknown } | null)?.error ?? event;
+        const typed = classifyWsHandshakeError(underlying);
+        reject(
+          typed ??
+            new ConnectionError(
+              `Could not establish KugelAudio WebSocket connection to ${url}. ` +
+                'Check network connectivity.',
+            ),
+        );
       };
     });
   }
@@ -287,7 +566,7 @@ class TTSResource {
         if (!pending) return;
         if (data.error) {
-          const error = this.parseError(data.error);
+          const error = this.parseError(data);
           pending.callbacks.onError?.(error);
           this.pendingRequests.delete(requestId);
           pending.reject(error);
@@ -301,7 +580,6 @@ class TTSResource {
             totalSamples: data.total_samples,
             durationMs: data.dur_ms,
             generationMs: data.gen_ms,
-            ttfaMs: data.ttfa_ms,
             rtf: data.rtf,
             error: data.error,
           };
@@ -321,23 +599,45 @@ class TTSResource {
           };
           pending.callbacks.onChunk?.(chunk);
         }
+        if (data.word_timestamps) {
+          const timestamps: WordTimestamp[] = data.word_timestamps.map(
+            (w: Record<string, unknown>) => ({
+              word: w.word as string,
+              startMs: w.start_ms as number,
+              endMs: w.end_ms as number,
+              charStart: w.char_start as number,
+              charEnd: w.char_end as number,
+              score: (w.score as number) ?? 1.0,
+            })
+          );
+          pending.callbacks.onWordTimestamps?.(timestamps);
+        }
       } catch (e) {
         console.error('Failed to parse WebSocket message:', e);
       }
     };
     ws.onclose = (event) => {
-      // Clear connection pool
+      // Clear connection pool and keepalive
+      this.stopKeepalive();
       this.wsConnection = null;
       this.wsUrl = null;
-      // Reject all pending requests
+      // Reject all pending requests with appropriate error types
       for (const [id, pending] of this.pendingRequests) {
         pending.callbacks.onClose?.();
-        if (event.code === 4001) {
-          pending.reject(new AuthenticationError('Authentication failed'));
-        } else if (event.code === 4003) {
-          pending.reject(new InsufficientCreditsError('Insufficient credits'));
+        // Only surface server-initiated error close codes; normal closes
+        // (1000, 1001) should not reject pending requests with an error.
+        if (
+          event.code === 4001 ||
+          event.code === 4003 ||
+          event.code === 4029 ||
+          event.code === 4500
+        ) {
+          const error = classifyWsClose(event.code, event.reason);
+          pending.callbacks.onError?.(error);
+          pending.reject(error);
         }
         this.pendingRequests.delete(id);
       }
@@ -345,7 +645,9 @@ class TTSResource {
     ws.onerror = () => {
       // Reject all pending requests
-      const error = new KugelAudioError('WebSocket connection error');
+      const error = new ConnectionError(
+        'KugelAudio WebSocket connection error. Check network connectivity.',
+      );
       for (const [id, pending] of this.pendingRequests) {
         pending.callbacks.onError?.(error);
         pending.reject(error);
@@ -380,6 +682,7 @@ class TTSResource {
     options: GenerateOptions,
     callbacks: StreamCallbacks
   ): Promise<void> {
+    warnIfNoLanguage(options.language, options.normalize);
     const ws = await this.getConnection();
     const requestId = ++this.requestCounter;
@@ -393,10 +696,14 @@ class TTSResource {
         model_id: options.modelId || 'kugel-1-turbo',
         voice_id: options.voiceId,
         cfg_scale: options.cfgScale ?? 2.0,
+        ...(options.temperature !== undefined && { temperature: options.temperature }),
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24000,
         normalize: options.normalize ?? true,
         ...(options.language && { language: options.language }),
+        ...(options.wordTimestamps && { word_timestamps: true }),
+        ...(options.speed !== undefined && { speed: options.speed }),
+        ...(options.projectId !== undefined && { project_id: options.projectId }),
       }));
     });
   }
@@ -408,6 +715,7 @@ class TTSResource {
     options: GenerateOptions,
     callbacks: StreamCallbacks
   ): Promise<void> {
+    warnIfNoLanguage(options.language, options.normalize);
     return new Promise((resolve, reject) => {
       const url = this.buildWsUrl();
       const ws = createWs(url);
@@ -424,6 +732,9 @@ class TTSResource {
           sample_rate: options.sampleRate ?? 24000,
           normalize: options.normalize ?? true,
           ...(options.language && { language: options.language }),
+          ...(options.wordTimestamps && { word_timestamps: true }),
+          ...(options.speed !== undefined && { speed: options.speed }),
+          ...(options.projectId !== undefined && { project_id: options.projectId }),
         }));
       };
@@ -438,7 +749,7 @@ class TTSResource {
           const data = JSON.parse(messageData);
           if (data.error) {
-            const error = this.parseError(data.error);
+            const error = this.parseError(data);
             callbacks.onError?.(error);
             ws.close();
             reject(error);
@@ -452,7 +763,6 @@ class TTSResource {
               totalSamples: data.total_samples,
               durationMs: data.dur_ms,
               generationMs: data.gen_ms,
-              ttfaMs: data.ttfa_ms,
               rtf: data.rtf,
               error: data.error,
             };
@@ -472,32 +782,87 @@ class TTSResource {
             };
             callbacks.onChunk?.(chunk);
           }
+          if (data.word_timestamps) {
+            const timestamps: WordTimestamp[] = data.word_timestamps.map(
+              (w: Record<string, unknown>) => ({
+                word: w.word as string,
+                startMs: w.start_ms as number,
+                endMs: w.end_ms as number,
+                charStart: w.char_start as number,
+                charEnd: w.char_end as number,
+                score: (w.score as number) ?? 1.0,
+              })
+            );
+            callbacks.onWordTimestamps?.(timestamps);
+          }
         } catch (e) {
           console.error('Failed to parse WebSocket message:', e);
         }
       };
-      ws.onerror = () => {
-        const error = new KugelAudioError('WebSocket connection error');
+      ws.onerror = (event: unknown) => {
+        const underlying = (event as { error?: unknown } | null)?.error ?? event;
+        const error =
+          classifyWsHandshakeError(underlying) ??
+          new ConnectionError(
+            'KugelAudio WebSocket connection error. Check network connectivity.',
+          );
         callbacks.onError?.(error);
         reject(error);
       };
       ws.onclose = (event) => {
         callbacks.onClose?.();
-        if (event.code === 4001) {
-          reject(new AuthenticationError('Authentication failed'));
-        } else if (event.code === 4003) {
-          reject(new InsufficientCreditsError('Insufficient credits'));
+        if (
+          event.code === 4001 ||
+          event.code === 4003 ||
+          event.code === 4029 ||
+          event.code === 4500
+        ) {
+          const error = classifyWsClose(event.code, event.reason);
+          callbacks.onError?.(error);
+          reject(error);
         }
       };
     });
   }
+  /**
+   * Start periodic keepalive pings on the pooled connection.
+   * Uses the ws package's ping() in Node.js; silently skips in browsers
+   * where WebSocket doesn't expose a ping method.
+   */
+  private startKeepalive(ws: WebSocket): void {
+    this.stopKeepalive();
+    const intervalMs = this.client.keepalivePingInterval;
+    if (intervalMs == null || intervalMs <= 0) return;
+    this.keepaliveTimer = setInterval(() => {
+      if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
+        this.stopKeepalive();
+        return;
+      }
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      if (typeof (ws as any).ping === 'function') {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        (ws as any).ping();
+      }
+    }, intervalMs);
+  }
+  private stopKeepalive(): void {
+    if (this.keepaliveTimer !== null) {
+      clearInterval(this.keepaliveTimer);
+      this.keepaliveTimer = null;
+    }
+  }
   /**
    * Close the pooled WebSocket connection.
    */
   close(): void {
+    this.stopKeepalive();
     if (this.wsConnection) {
       try {
         this.wsConnection.close();
@@ -509,15 +874,43 @@ class TTSResource {
     }
   }
-  private parseError(message: string): Error {
-    const lower = message.toLowerCase();
-    if (lower.includes('auth') || lower.includes('unauthorized')) {
-      return new AuthenticationError(message);
-    }
-    if (lower.includes('credit')) {
-      return new InsufficientCreditsError(message);
-    }
-    return new KugelAudioError(message);
+  private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
+    return classifyWsFrame(data);
+  }
+  /**
+   * Create a streaming session for LLM integration.
+   *
+   * The session connects to `/ws/tts/stream` and keeps a persistent
+   * connection across multiple {@link StreamingSession.send} calls.
+   * The server auto-chunks text at sentence boundaries — no client-side
+   * flushing required.
+   *
+   * @param config - Session configuration (voice, model, chunking strategy).
+   * @param callbacks - Callbacks for audio chunks and session lifecycle events.
+   * @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
+   *
+   * @example
+   * ```typescript
+   * const session = client.tts.streamingSession(
+   *   { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
+   *   { onChunk: (chunk) => playAudio(chunk.audio) },
+   * );
+   *
+   * session.connect();
+   *
+   * for await (const token of llmStream) {
+   *   session.send(token);
+   * }
+   *
+   * await session.close();
+   * ```
+   */
+  streamingSession(
+    config: StreamConfig,
+    callbacks: StreamingSessionCallbacks
+  ): StreamingSession {
+    return new StreamingSession(this.client, config, callbacks);
   }
   /**
@@ -538,7 +931,7 @@ class TTSResource {
    *     console.log(`Audio from ${chunk.contextId}`);
    *     playAudio(chunk.audio);
    *   },
-   *   onContextFinal: (contextId) => {
+   *   onContextClosed: (contextId) => {
    *     console.log(`${contextId} finished`);
    *   },
    * });
@@ -589,8 +982,13 @@ class MultiContextSession {
   /**
    * Connect to the multi-context WebSocket endpoint.
+   *
+   * The returned promise resolves once the WebSocket is OPEN so callers can
+   * ``await session.connect(callbacks)`` before invoking
+   * {@link createContext} / {@link send}. Pre-open errors reject with the
+   * typed error.
    */
-  connect(callbacks: import('./types').MultiContextCallbacks): void {
+  connect(callbacks: import('./types').MultiContextCallbacks): Promise<void> {
     this.callbacks = callbacks;
     const wsUrl = this.client.ttsUrl
@@ -608,12 +1006,9 @@ class MultiContextSession {
     const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
     this.ws = createWs(url);
+    const ws = this.ws;
-    this.ws.onopen = () => {
-      // Connection established, ready to create contexts
-    };
-    this.ws.onmessage = (event: { data: unknown }) => {
+    ws.onmessage = (event: { data: unknown }) => {
       try {
         // Handle both browser (string) and Node.js (Buffer) message formats
         const messageData = typeof event.data === 'string'
@@ -654,10 +1049,6 @@ class MultiContextSession {
           this.callbacks.onChunk?.(chunk);
         }
-        if (data.is_final) {
-          this.callbacks.onContextFinal?.(data.context_id);
-        }
         if (data.context_closed) {
           this.contexts.delete(data.context_id);
           this.callbacks.onContextClosed?.(data.context_id);
@@ -676,20 +1067,51 @@ class MultiContextSession {
       }
     };
-    this.ws.onerror = () => {
-      this.callbacks.onError?.(new KugelAudioError('WebSocket connection error'));
-    };
+    return new Promise<void>((resolve, reject) => {
+      let opened = false;
-    this.ws.onclose = (event) => {
-      if (event.code === 4001) {
-        this.callbacks.onError?.(new AuthenticationError('Authentication failed'));
-      } else if (event.code === 4003) {
-        this.callbacks.onError?.(new InsufficientCreditsError('Insufficient credits'));
-      }
-      this.ws = null;
-      this.isStarted = false;
-      this.contexts.clear();
-    };
+      ws.onopen = () => {
+        opened = true;
+        resolve();
+      };
+      ws.onerror = (event: unknown) => {
+        const underlying = (event as { error?: unknown } | null)?.error ?? event;
+        const err =
+          classifyWsHandshakeError(underlying) ??
+          new ConnectionError(
+            'KugelAudio multi-context WebSocket connection error. ' +
+              'Check network connectivity.',
+          );
+        if (!opened) reject(err);
+        this.callbacks.onError?.(err);
+      };
+      ws.onclose = (event) => {
+        let typedErr: KugelAudioError | null = null;
+        if (
+          event.code === 4001 ||
+          event.code === 4003 ||
+          event.code === 4029 ||
+          event.code === 4500
+        ) {
+          typedErr = classifyWsClose(event.code, event.reason);
+          this.callbacks.onError?.(typedErr);
+        }
+        if (!opened) {
+          reject(
+            typedErr ??
+              new ConnectionError(
+                `KugelAudio multi-context WebSocket closed before ready ` +
+                  `(code ${event.code}).`,
+              ),
+          );
+        }
+        this.ws = null;
+        this.isStarted = false;
+        this.contexts.clear();
+      };
+    });
   }
   /**
@@ -713,10 +1135,13 @@ class MultiContextSession {
     // Include session config on first context
     if (!this.isStarted) {
+      warnIfNoLanguage(this.config.language, this.config.normalize);
       if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
       if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
+      if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
       if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
+      if (this.config.language) msg.language = this.config.language;
       if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
     }
@@ -821,6 +1246,336 @@ class MultiContextSession {
   }
 }
+/**
+ * Streaming session for LLM integration via `/ws/tts/stream`.
+ *
+ * The server accumulates text across multiple {@link send} calls and
+ * auto-chunks it at sentence boundaries, keeping the KV cache warm between
+ * chunks for natural prosody.  You never need to call `flush` explicitly —
+ * configure {@link StreamConfig.chunkLengthSchedule} or
+ * {@link StreamConfig.autoMode} instead.
+ *
+ * @example
+ * ```typescript
+ * const session = client.tts.streamingSession({
+ *   voiceId: 123,
+ *   autoMode: true,
+ *   chunkLengthSchedule: [50, 100, 150, 250],
+ * }, {
+ *   onChunk: (chunk) => playAudio(chunk.audio),
+ *   onSessionClosed: (totalSecs) => console.log(`Done: ${totalSecs}s`),
+ * });
+ *
+ * session.connect();
+ *
+ * for await (const token of llmStream) {
+ *   session.send(token);
+ * }
+ *
+ * await session.close();
+ * ```
+ */
+class StreamingSession {
+  private ws: WebSocket | null = null;
+  private config: StreamConfig;
+  private callbacks: StreamingSessionCallbacks;
+  private client: KugelAudio;
+  private configSent = false;
+  constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks) {
+    this.client = client;
+    this.config = config;
+    this.callbacks = callbacks;
+  }
+  /**
+   * Open the WebSocket connection and authenticate.
+   *
+   * The returned promise resolves once the WebSocket is OPEN, so callers can
+   * ``await session.connect()`` and then ``send()`` without racing the
+   * handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
+   * the promise with the typed error.
+   */
+  connect(): Promise<void> {
+    const wsUrl = this.client.ttsUrl
+      .replace('https://', 'wss://')
+      .replace('http://', 'ws://');
+    let authParam: string;
+    if (this.client.isToken) {
+      authParam = 'token';
+    } else if (this.client.isMasterKey) {
+      authParam = 'master_key';
+    } else {
+      authParam = 'api_key';
+    }
+    const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
+    this.ws = createWs(url);
+    const ws = this.ws;
+    ws.onmessage = (event: { data: unknown }) => {
+      try {
+        const messageData = typeof event.data === 'string'
+          ? event.data
+          : event.data instanceof Buffer
+            ? event.data.toString()
+            : String(event.data);
+        const data = JSON.parse(messageData);
+        if (data.error) {
+          this.callbacks.onError?.(new KugelAudioError(data.error));
+          return;
+        }
+        if (data.audio) {
+          const chunk: AudioChunk = {
+            audio: data.audio,
+            encoding: data.enc || 'pcm_s16le',
+            index: data.idx,
+            sampleRate: data.sr,
+            samples: data.samples,
+          };
+          this.callbacks.onChunk?.(chunk);
+        }
+        if (data.word_timestamps) {
+          const timestamps = data.word_timestamps.map((w: Record<string, unknown>) => ({
+            word: w.word as string,
+            startMs: w.start_ms as number,
+            endMs: w.end_ms as number,
+            charStart: w.char_start as number,
+            charEnd: w.char_end as number,
+            score: (w.score as number) ?? 1.0,
+          }));
+          this.callbacks.onWordTimestamps?.(timestamps);
+        }
+        if (data.chunk_complete) {
+          this.callbacks.onChunkComplete?.(
+            data.chunk_id ?? 0,
+            data.audio_seconds ?? 0,
+            data.gen_ms ?? 0,
+          );
+        }
+        if (data.generation_started) {
+          this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
+        }
+        if (data.session_closed) {
+          this.callbacks.onSessionClosed?.(
+            data.total_audio_seconds ?? 0,
+            data.total_text_chunks ?? 0,
+            data.total_audio_chunks ?? 0,
+          );
+        }
+      } catch (e) {
+        console.error('[KugelAudio] Failed to parse streaming session message:', e);
+      }
+    };
+    return new Promise<void>((resolve, reject) => {
+      let opened = false;
+      ws.onopen = () => {
+        opened = true;
+        resolve();
+      };
+      ws.onerror = (event: unknown) => {
+        const underlying = (event as { error?: unknown } | null)?.error ?? event;
+        const err =
+          classifyWsHandshakeError(underlying) ??
+          new ConnectionError(
+            'KugelAudio streaming WebSocket connection error. ' +
+              'Check network connectivity.',
+          );
+        if (!opened) reject(err);
+        this.callbacks.onError?.(err);
+      };
+      ws.onclose = (event) => {
+        let typedErr: KugelAudioError | null = null;
+        if (
+          event.code === 4001 ||
+          event.code === 4003 ||
+          event.code === 4029 ||
+          event.code === 4500
+        ) {
+          typedErr = classifyWsClose(event.code, event.reason);
+          this.callbacks.onError?.(typedErr);
+        }
+        if (!opened) {
+          reject(
+            typedErr ??
+              new ConnectionError(
+                `KugelAudio streaming WebSocket closed before ready ` +
+                  `(code ${event.code}).`,
+              ),
+          );
+        }
+        this.ws = null;
+        this.configSent = false;
+      };
+    });
+  }
+  /**
+   * Send a text chunk to the server (e.g. one LLM output token).
+   *
+   * The server buffers text across multiple calls and starts generating at
+   * natural sentence boundaries automatically — no need to call `flush`.
+   *
+   * @param text - Raw text or LLM token to append to the server buffer.
+   * @param flush - Force immediate generation of whatever is buffered.
+   *   **Avoid calling this per-sentence from the client.** Doing so bypasses
+   *   the server's semantic chunking, incurs a fresh model prefill cost on
+   *   every flush, and makes latency *worse*, not better.  Let the server
+   *   handle chunking via `chunkLengthSchedule` / `autoMode` instead.
+   */
+  send(text: string, flush = false): void {
+    if (!this.ws || this.ws.readyState !== WS_OPEN) {
+      throw new KugelAudioError('StreamingSession not connected. Call connect() first.');
+    }
+    const msg: Record<string, unknown> = { text, flush };
+    if (!this.configSent) {
+      if (this.config.voiceId !== undefined) msg.voice_id = this.config.voiceId;
+      if (this.config.modelId !== undefined) msg.model_id = this.config.modelId;
+      if (this.config.cfgScale !== undefined) msg.cfg_scale = this.config.cfgScale;
+      if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
+      if (this.config.maxNewTokens !== undefined) msg.max_new_tokens = this.config.maxNewTokens;
+      if (this.config.sampleRate !== undefined) msg.sample_rate = this.config.sampleRate;
+      if (this.config.flushTimeoutMs !== undefined) msg.flush_timeout_ms = this.config.flushTimeoutMs;
+      if (this.config.maxBufferLength !== undefined) msg.max_buffer_length = this.config.maxBufferLength;
+      if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
+      if (this.config.language !== undefined) msg.language = this.config.language;
+      if (this.config.wordTimestamps) msg.word_timestamps = true;
+      if (this.config.autoMode !== undefined) msg.auto_mode = this.config.autoMode;
+      if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
+      if (this.config.speed !== undefined) msg.speed = this.config.speed;
+      this.configSent = true;
+    }
+    this.ws.send(JSON.stringify(msg));
+  }
+  /**
+   * End the current session but keep the WebSocket connection open.
+   *
+   * This allows starting a new session on the same connection, avoiding
+   * the overhead of a new WebSocket handshake (~200-300ms). After calling
+   * this, optionally call {@link updateConfig} to change voice/model settings,
+   * then call {@link send} to start the next session.
+   *
+   * The returned promise resolves once the server confirms with a
+   * `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
+   * elapse without *any* server message arriving. The timer resets on every
+   * incoming frame so a long final flush that streams audio for tens of
+   * seconds is not truncated; only a genuinely silent server trips the fuse.
+   */
+  endSession(): Promise<void> {
+    if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
+    const ws = this.ws;
+    // Quiet timeout: resets on every incoming server message. Trips only when
+    // the server has been silent for this long. The previous wall-clock fuse
+    // (10 s total) silently truncated audio when the final flushed chunk
+    // took longer to generate than the budget — see fix in this commit.
+    const QUIET_TIMEOUT_MS = 15_000;
+    return new Promise<void>((resolve) => {
+      let settled = false;
+      let timer: ReturnType<typeof setTimeout>;
+      const prevMessage = ws.onmessage;
+      const prevClose = ws.onclose;
+      const done = () => {
+        if (settled) return;
+        settled = true;
+        clearTimeout(timer);
+        // Restore the original handlers so subsequent endSession() calls
+        // don't stack wrappers and so the typed-error onclose installed
+        // by connect() remains in effect for the next session.
+        ws.onmessage = prevMessage;
+        ws.onclose = prevClose;
+        this.configSent = false;
+        resolve();
+      };
+      const armQuietTimer = () => {
+        clearTimeout(timer);
+        timer = setTimeout(done, QUIET_TIMEOUT_MS);
+      };
+      armQuietTimer();
+      ws.onmessage = (event: MessageEvent) => {
+        // Reset the quiet timer on EVERY incoming frame — audio chunks for
+        // the final flush count as liveness, not just session_closed.
+        armQuietTimer();
+        if (prevMessage) prevMessage.call(ws, event);
+        try {
+          const raw = typeof event.data === 'string'
+            ? event.data
+            : event.data instanceof Buffer
+              ? event.data.toString()
+              : String(event.data);
+          if (JSON.parse(raw).session_closed) done();
+        } catch { /* ignore parse errors */ }
+      };
+      ws.onclose = (event: CloseEvent) => {
+        this.ws = null;
+        if (prevClose) prevClose.call(ws, event);
+        done();
+      };
+      ws.send(JSON.stringify({ close: true }));
+    });
+  }
+  /**
+   * Update session configuration for the next session.
+   *
+   * Call this after {@link endSession} and before the next {@link send}
+   * to change voice, model, language, or other settings.
+   */
+  updateConfig(config: Partial<StreamConfig>): void {
+    Object.assign(this.config, config);
+    this.configSent = false;
+  }
+  /**
+   * Close the session and the WebSocket connection.
+   *
+   * For session reuse without closing the connection, use
+   * {@link endSession} instead.
+   *
+   * The returned promise resolves once the server confirms the close with a
+   * `session_closed` message, or after a 15 s **quiet** timeout (no traffic
+   * from the server in that window). Audio frames from the server-side
+   * final-flush of the still-buffered text are delivered to your callbacks
+   * before this promise resolves, and each frame resets the quiet timer.
+   */
+  async close(): Promise<void> {
+    await this.endSession();
+    if (this.ws) {
+      try { this.ws.close(); } catch { /* already closed */ }
+      this.ws = null;
+    }
+  }
+  /** Whether the underlying WebSocket is open. */
+  get isConnected(): boolean {
+    return this.ws !== null && this.ws.readyState === WS_OPEN;
+  }
+}
 /**
  * KugelAudio API client.
  *
@@ -834,13 +1589,13 @@ class MultiContextSession {
  * // List voices
  * const voices = await client.voices.list();
  *
- * // Generate audio with fast model (1.5B params)
+ * // Generate audio with fast model
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
  *   modelId: 'kugel-1-turbo',
  * });
  *
- * // Generate audio with premium model (7B params)
+ * // Generate audio with premium model
  * const audio = await client.tts.generate({
  *   text: 'Hello, world!',
  *   modelId: 'kugel-1',
@@ -855,6 +1610,7 @@ export class KugelAudio {
   private _apiUrl: string;
   private _ttsUrl: string;
   private _timeout: number;
+  private _keepalivePingInterval: number | null;
   /** Models resource */
   public readonly models: ModelsResource;
@@ -865,17 +1621,37 @@ export class KugelAudio {
   constructor(options: KugelAudioOptions) {
     if (!options.apiKey) {
-      throw new Error('API key is required');
+      throw new ValidationError(
+        'KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY ' +
+          'environment variable or pass { apiKey: ... } to the client. ' +
+          'Get a key at https://app.kugelaudio.com/settings/api-keys.',
+      );
     }
-    this._apiKey = options.apiKey;
+    const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
+    this._apiKey = cleanKey;
     this._isMasterKey = options.isMasterKey || false;
     this._isToken = options.isToken || false;
     this._orgId = options.orgId;
-    this._apiUrl = (options.apiUrl || DEFAULT_API_URL).replace(/\/$/, '');
+    if (options.apiUrl) {
+      this._apiUrl = options.apiUrl.replace(/\/$/, '');
+    } else {
+      const effectiveRegion = options.region || detectedRegion || 'eu';
+      if (!(effectiveRegion in REGION_URLS)) {
+        throw new ValidationError(
+          `Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(', ')}.`,
+        );
+      }
+      this._apiUrl = REGION_URLS[effectiveRegion as Region];
+    }
     // If ttsUrl not specified, use apiUrl (backend proxies to TTS server)
     this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, '');
     this._timeout = options.timeout || 60000;
+    this._keepalivePingInterval = options.keepalivePingInterval !== undefined
+      ? options.keepalivePingInterval
+      : 20000;
     this.models = new ModelsResource(this);
     this.voices = new VoicesResource(this);
@@ -929,6 +1705,11 @@ export class KugelAudio {
     return this._ttsUrl;
   }
+  /** Get keepalive ping interval in milliseconds, or null if disabled. */
+  get keepalivePingInterval(): number | null {
+    return this._keepalivePingInterval;
+  }
   /**
    * Close the client and release resources.
    * This closes any pooled WebSocket connections.
@@ -991,25 +1772,57 @@ export class KugelAudio {
       clearTimeout(timeoutId);
-      if (response.status === 401) {
-        throw new AuthenticationError('Invalid API key');
+      if (!response.ok) {
+        const text = await response.text();
+        throw classifyHttpError(response.status, text, response.headers);
       }
-      if (response.status === 403) {
-        throw new InsufficientCreditsError('Access denied');
+      return await response.json();
+    } catch (error) {
+      clearTimeout(timeoutId);
+      if (error instanceof KugelAudioError) {
+        throw error;
       }
-      if (response.status === 429) {
-        throw new RateLimitError('Rate limit exceeded');
+      if ((error as Error).name === 'AbortError') {
+        throw new ConnectionError(
+          `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
+        );
       }
+      throw new ConnectionError(
+        `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
+          'Check network connectivity.',
+      );
+    }
+  }
+  /**
+   * Make a multipart/form-data request (for file uploads).
+   * @internal Used by VoicesResource for reference file uploads.
+   */
+  async requestMultipart<T>(method: string, path: string, formData: FormData): Promise<T> {
+    const url = `${this._apiUrl}${path}`;
+    const headers: Record<string, string> = {
+      'X-API-Key': this._apiKey,
+      'Authorization': `Bearer ${this._apiKey}`,
+    };
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), this._timeout);
+    try {
+      const response = await fetch(url, {
+        method,
+        headers,
+        body: formData,
+        signal: controller.signal,
+      });
+      clearTimeout(timeoutId);
       if (!response.ok) {
         const text = await response.text();
-        let message = `HTTP ${response.status}`;
-        try {
-          const json = JSON.parse(text);
-          message = json.detail || json.error || message;
-        } catch {
-          message = text || message;
-        }
-        throw new KugelAudioError(message, response.status);
+        throw classifyHttpError(response.status, text, response.headers);
       }
       return await response.json();
@@ -1019,9 +1832,14 @@ export class KugelAudio {
         throw error;
       }
       if ((error as Error).name === 'AbortError') {
-        throw new KugelAudioError('Request timed out');
+        throw new ConnectionError(
+          `Request to ${method} ${path} timed out after ${this._timeout}ms.`,
+        );
       }
-      throw new KugelAudioError(`Request failed: ${(error as Error).message}`);
+      throw new ConnectionError(
+        `Could not reach KugelAudio at ${url}: ${(error as Error).message}. ` +
+          'Check network connectivity.',
+      );
     }
   }
 }