npm - kugelaudio - Versions diffs - 0.6.1 → 0.8.0 - Mend

kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/src/client.ts CHANGED Viewed

@@ -29,14 +29,18 @@ import type {
     VoiceReference,
     WordTimestamp
 } from './types';
+import { parseSessionUsage } from './types';
 import { base64ToArrayBuffer } from './utils';
 import { getWebSocket } from './websocket';
 import type { Region } from './types';
+import packageJson from '../package.json';
 const DEFAULT_API_URL = 'https://api.kugelaudio.com';
 const EU_API_URL = 'https://api.eu.kugelaudio.com';
 const SUPPORTED_REGIONS = ['eu', 'us', 'global'] as const;
+const SDK_NAME = 'js';
+const SDK_VERSION = packageJson.version;
 const REGION_PREFIXES = ['eu-', 'us-', 'global-'] as const;
@@ -49,6 +53,18 @@ function parseApiKey(apiKey: string): { cleanKey: string; detectedRegion?: Regio
   return { cleanKey: apiKey };
 }
+function sdkHeaders(): Record<string, string> {
+  return {
+    'X-KugelAudio-SDK': SDK_NAME,
+    'X-KugelAudio-SDK-Version': SDK_VERSION,
+  };
+}
+function appendSdkQuery(url: string): string {
+  const separator = url.includes('?') ? '&' : '?';
+  return `${url}${separator}sdk=${encodeURIComponent(SDK_NAME)}&sdk_version=${encodeURIComponent(SDK_VERSION)}`;
+}
 /**
  * Create a new WebSocket instance.
  * Lazily resolves the constructor to avoid top-level side-effects
@@ -491,7 +507,7 @@ class TTSResource {
     if (this.client.orgId !== undefined) {
       url += `&org_id=${this.client.orgId}`;
     }
-    return url;
+    return appendSdkQuery(url);
   }
   /**
@@ -581,6 +597,7 @@ class TTSResource {
             generationMs: data.gen_ms,
             rtf: data.rtf,
             error: data.error,
+            usage: parseSessionUsage(data) ?? undefined,
           };
           pending.callbacks.onFinal?.(stats);
           this.pendingRequests.delete(requestId);
@@ -692,17 +709,21 @@ class TTSResource {
       ws.send(JSON.stringify({
         text: options.text,
-        model_id: options.modelId || 'kugel-1-turbo',
+        model_id: options.modelId || 'kugel-3',
         voice_id: options.voiceId,
         cfg_scale: options.cfgScale ?? 2.0,
         ...(options.temperature !== undefined && { temperature: options.temperature }),
         max_new_tokens: options.maxNewTokens ?? 2048,
         sample_rate: options.sampleRate ?? 24000,
+        ...(options.outputFormat && { output_format: options.outputFormat }),
         normalize: options.normalize ?? true,
         ...(options.language && { language: options.language }),
         ...(options.wordTimestamps && { word_timestamps: true }),
         ...(options.speed !== undefined && { speed: options.speed }),
         ...(options.projectId !== undefined && { project_id: options.projectId }),
+        // [] is meaningful (explicit opt-out) and must be sent; only
+        // undefined (use the project default) is omitted.
+        ...(options.dictionaryIds !== undefined && { dictionary_ids: options.dictionaryIds }),
       }));
     });
   }
@@ -724,16 +745,20 @@ class TTSResource {
         // Send TTS request
         ws.send(JSON.stringify({
           text: options.text,
-          model_id: options.modelId || 'kugel-1-turbo',
+          model_id: options.modelId || 'kugel-3',
           voice_id: options.voiceId,
           cfg_scale: options.cfgScale ?? 2.0,
           max_new_tokens: options.maxNewTokens ?? 2048,
           sample_rate: options.sampleRate ?? 24000,
+          ...(options.outputFormat && { output_format: options.outputFormat }),
           normalize: options.normalize ?? true,
           ...(options.language && { language: options.language }),
           ...(options.wordTimestamps && { word_timestamps: true }),
           ...(options.speed !== undefined && { speed: options.speed }),
           ...(options.projectId !== undefined && { project_id: options.projectId }),
+          // [] is meaningful (explicit opt-out) and must be sent; only
+          // undefined (use the project default) is omitted.
+          ...(options.dictionaryIds !== undefined && { dictionary_ids: options.dictionaryIds }),
         }));
       };
@@ -764,6 +789,7 @@ class TTSResource {
               generationMs: data.gen_ms,
               rtf: data.rtf,
               error: data.error,
+              usage: parseSessionUsage(data) ?? undefined,
             };
             callbacks.onFinal?.(stats);
             ws.close();
@@ -962,7 +988,11 @@ class MultiContextSession {
   private config: import('./types').MultiContextConfig;
   private callbacks: import('./types').MultiContextCallbacks = {};
   private contexts: Set<string> = new Set();
+  /** Contexts a create message has been sent for (not yet necessarily
+   *  confirmed by the server via context_created). */
+  private requestedContexts: Set<string> = new Set();
   private _sessionId: string | null = null;
+  private _contextUsage: Map<string, import('./types').SessionUsage> = new Map();
   private isStarted = false;
   constructor(
@@ -979,6 +1009,20 @@ class MultiContextSession {
     return this._sessionId;
   }
+  /**
+   * Per-context usage (audio time + amount charged) for a closed context, or
+   * null if that context hasn't closed yet. Each context is its own
+   * conversation — use this to bill per conversation. See {@link SessionUsage}.
+   */
+  usageFor(contextId: string): import('./types').SessionUsage | null {
+    return this._contextUsage.get(contextId) ?? null;
+  }
+  /** Map of context_id → per-context usage for all closed contexts. */
+  get contextUsage(): Map<string, import('./types').SessionUsage> {
+    return new Map(this._contextUsage);
+  }
   /**
    * Connect to the multi-context WebSocket endpoint.
    *
@@ -1003,7 +1047,7 @@ class MultiContextSession {
       authParam = 'api_key';
     }
-    const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
+    const url = appendSdkQuery(`${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`);
     this.ws = createWs(url);
     const ws = this.ws;
@@ -1048,13 +1092,25 @@ class MultiContextSession {
           this.callbacks.onChunk?.(chunk);
         }
+        if (data.final && data.context_id) {
+          // Per-context end-of-audio marker (KUG-1238): all audio admitted
+          // before the client's flush has been delivered; also precedes
+          // context_closed on a graceful close.
+          this.callbacks.onFinal?.(data.context_id);
+        }
         if (data.context_closed) {
           this.contexts.delete(data.context_id);
-          this.callbacks.onContextClosed?.(data.context_id);
+          this.requestedContexts.delete(data.context_id);
+          // Per-context (per-conversation) usage rides on context_closed.
+          const ctxUsage = parseSessionUsage(data) ?? undefined;
+          if (ctxUsage) this._contextUsage.set(data.context_id, ctxUsage);
+          this.callbacks.onContextClosed?.(data.context_id, ctxUsage);
         }
         if (data.context_timeout) {
           this.contexts.delete(data.context_id);
+          this.requestedContexts.delete(data.context_id);
           this.callbacks.onContextTimeout?.(data.context_id);
         }
@@ -1109,6 +1165,7 @@ class MultiContextSession {
         this.ws = null;
         this.isStarted = false;
         this.contexts.clear();
+        this.requestedContexts.clear();
       };
     });
   }
@@ -1126,6 +1183,7 @@ class MultiContextSession {
     if (!this.ws || this.ws.readyState !== WS_OPEN) {
       throw new KugelAudioError('WebSocket not connected');
     }
+    this.requestedContexts.add(contextId);
     const msg: Record<string, unknown> = {
       text: ' ',
@@ -1136,26 +1194,36 @@ class MultiContextSession {
     if (!this.isStarted) {
       warnIfNoLanguage(this.config.language, this.config.normalize);
       if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
+      if (this.config.outputFormat) msg.output_format = this.config.outputFormat;
       if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
       if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
       if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
       if (this.config.language) msg.language = this.config.language;
+      // [] is meaningful (explicit opt-out) and must be sent; only
+      // undefined (use the project default) is omitted.
+      if (this.config.dictionaryIds !== undefined) msg.dictionary_ids = this.config.dictionaryIds;
       if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
     }
-    // Per-context voice
+    // Per-context voice. The server binds a context's voice ONLY from
+    // voice_settings.voice_id at context creation — a top-level voice_id
+    // merely updates the session config and leaves the context voiceless,
+    // which the server rejects with MISSING_VOICE_ID on the first text
+    // (KUG-1233). This matches the Python SDK's wire format.
+    const voiceSettings: Record<string, unknown> = {};
     const voiceId = options?.voiceId || this.config.defaultVoiceId;
-    if (voiceId) msg.voice_id = voiceId;
+    if (voiceId) voiceSettings.voice_id = voiceId;
     if (options?.voiceSettings) {
-      msg.voice_settings = {
-        stability: options.voiceSettings.stability,
-        similarity_boost: options.voiceSettings.similarityBoost,
-        style: options.voiceSettings.style,
-        use_speaker_boost: options.voiceSettings.useSpeakerBoost,
-        speed: options.voiceSettings.speed,
-      };
+      voiceSettings.stability = options.voiceSettings.stability;
+      voiceSettings.similarity_boost = options.voiceSettings.similarityBoost;
+      voiceSettings.style = options.voiceSettings.style;
+      voiceSettings.use_speaker_boost = options.voiceSettings.useSpeakerBoost;
+      voiceSettings.speed = options.voiceSettings.speed;
+    }
+    if (Object.keys(voiceSettings).length > 0) {
+      msg.voice_settings = voiceSettings;
     }
     this.ws.send(JSON.stringify(msg));
@@ -1169,8 +1237,12 @@ class MultiContextSession {
       throw new KugelAudioError('WebSocket not connected');
     }
-    // Auto-create context if needed
-    if (!this.contexts.has(contextId) && !this.isStarted) {
+    // Auto-create context if needed. Tracked via requestedContexts (sent
+    // creates, not yet necessarily confirmed) rather than this.contexts
+    // (server-confirmed) — otherwise a send() to a new context after the
+    // session started goes out bare, and the server auto-creates the
+    // context without voice_settings → MISSING_VOICE_ID (KUG-1233).
+    if (!this.requestedContexts.has(contextId) && !this.contexts.has(contextId)) {
       this.createContext(contextId);
     }
@@ -1236,6 +1308,7 @@ class MultiContextSession {
     this.ws = null;
     this.isStarted = false;
     this.contexts.clear();
+    this.requestedContexts.clear();
   }
   /**
@@ -1288,6 +1361,7 @@ class StreamingSession {
   private callbacks: StreamingSessionCallbacks;
   private client: KugelAudio;
   private configSent = false;
+  private _lastUsage: import('./types').SessionUsage | null = null;
   constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks) {
     this.client = client;
@@ -1295,6 +1369,15 @@ class StreamingSession {
     this.callbacks = callbacks;
   }
+  /**
+   * Per-session usage from the most recently closed session, or null before
+   * the first session closes. Use this to bill your own customers per
+   * conversation. See {@link SessionUsage}.
+   */
+  get lastUsage(): import('./types').SessionUsage | null {
+    return this._lastUsage;
+  }
   /**
    * Open the WebSocket connection and authenticate.
    *
@@ -1317,7 +1400,7 @@ class StreamingSession {
       authParam = 'api_key';
     }
-    const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
+    const url = appendSdkQuery(`${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`);
     this.ws = createWs(url);
     const ws = this.ws;
@@ -1374,7 +1457,18 @@ class StreamingSession {
           this.callbacks.onInterrupted?.();
         }
+        if (data.final) {
+          // End-of-audio marker for the turn (KUG-1238) — arrives after
+          // the last audio frame and before session_closed.
+          this.callbacks.onFinal?.(
+            data.total_audio_seconds ?? 0,
+            data.total_text_chunks ?? 0,
+            data.total_audio_chunks ?? 0,
+          );
+        }
         if (data.session_closed) {
+          this._lastUsage = parseSessionUsage(data);
           this.callbacks.onSessionClosed?.(
             data.total_audio_seconds ?? 0,
             data.total_text_chunks ?? 0,
@@ -1459,6 +1553,7 @@ class StreamingSession {
       if (this.config.temperature !== undefined) msg.temperature = this.config.temperature;
       if (this.config.maxNewTokens !== undefined) msg.max_new_tokens = this.config.maxNewTokens;
       if (this.config.sampleRate !== undefined) msg.sample_rate = this.config.sampleRate;
+      if (this.config.outputFormat !== undefined) msg.output_format = this.config.outputFormat;
       if (this.config.flushTimeoutMs !== undefined) msg.flush_timeout_ms = this.config.flushTimeoutMs;
       if (this.config.maxBufferLength !== undefined) msg.max_buffer_length = this.config.maxBufferLength;
       if (this.config.normalize !== undefined) msg.normalize = this.config.normalize;
@@ -1467,6 +1562,9 @@ class StreamingSession {
       if (this.config.autoMode !== undefined) msg.auto_mode = this.config.autoMode;
       if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
       if (this.config.speed !== undefined) msg.speed = this.config.speed;
+      // [] is meaningful (explicit opt-out) and must be sent; only
+      // undefined (use the project default) is omitted.
+      if (this.config.dictionaryIds !== undefined) msg.dictionary_ids = this.config.dictionaryIds;
       this.configSent = true;
     }
@@ -1865,6 +1963,7 @@ export class KugelAudio {
       'Content-Type': 'application/json',
       'X-API-Key': this._apiKey,
       'Authorization': `Bearer ${this._apiKey}`,
+      ...sdkHeaders(),
     };
     const controller = new AbortController();
@@ -1913,6 +2012,7 @@ export class KugelAudio {
     const headers: Record<string, string> = {
       'X-API-Key': this._apiKey,
       'Authorization': `Bearer ${this._apiKey}`,
+      ...sdkHeaders(),
     };
     const controller = new AbortController();

package/src/index.ts CHANGED Viewed

@@ -62,6 +62,7 @@ export type {
     MultiContextAudioChunk,
     MultiContextCallbacks,
     MultiContextConfig,
+    SessionUsage,
     StreamCallbacks,
     StreamConfig,
     StreamingSessionCallbacks,
@@ -78,6 +79,7 @@ export type {
     VoiceSex,
     WordTimestamp
 } from './types';
+export { parseSessionUsage } from './types';
 export { DictionariesResource, DictionaryEntriesResource } from './dictionaries';

package/src/types.ts CHANGED Viewed

@@ -240,7 +240,7 @@ export interface WordTimestamp {
 export interface GenerateOptions {
   /** Text to synthesize */
   text: string;
-  /** Model to use: 'kugel-1-turbo' (fast) or 'kugel-1' (premium). Default: 'kugel-1-turbo' */
+  /** Model to use. Default: 'kugel-3'. Legacy ids (kugel-2.5, kugel-1-turbo, …) still accepted; they alias to kugel-3 server-side. */
   modelId?: string;
   /** Voice ID to use */
   voiceId?: number;
@@ -258,7 +258,13 @@ export interface GenerateOptions {
   maxNewTokens?: number;
   /** Output sample rate (default: 24000) */
   sampleRate?: number;
-  /**
+  /**
+   * Combined codec+rate token, e.g. 'ulaw_8000' / 'alaw_8000' / 'pcm_8000'.
+   * Opt-in; when set it is authoritative and must not contradict sampleRate.
+   * Absent ⇒ legacy PCM16 at sampleRate.
+   */
+  outputFormat?: string;
+  /**
    * Enable text normalization (converts numbers, dates, etc. to spoken words).
    * When true, text will be normalized before TTS generation.
    * Default: true
@@ -286,8 +292,8 @@ export interface GenerateOptions {
   /**
    * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
    *
-   * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-   * can also be used for per-segment speed control.
+   * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+   * whole request (no per-span control).
    * Range: [0.8, 1.2]. Default: 1.0.
    */
   speed?: number;
@@ -298,6 +304,14 @@ export interface GenerateOptions {
    * server treats the value as trusted once received.
    */
   projectId?: number;
+  /**
+   * Per-request dictionary selection. Omit for the default behavior (all
+   * active dictionaries of the project apply, filtered by language). An
+   * empty array disables dictionaries for this request. A list of
+   * dictionary IDs applies exactly those dictionaries — including
+   * inactive ones — bypassing the language filter.
+   */
+  dictionaryIds?: number[];
 }
 /**
@@ -320,7 +334,7 @@ export interface GenerateOptions {
 export interface StreamConfig {
   /** Voice ID to use */
   voiceId?: number;
-  /** Model ID ('kugel-1-turbo' or 'kugel-1'). Default: 'kugel-1-turbo' */
+  /** Model ID. Default: 'kugel-3'. Legacy ids still accepted; they alias to kugel-3 server-side. */
   modelId?: string;
   /** CFG scale for generation */
   cfgScale?: number;
@@ -333,6 +347,8 @@ export interface StreamConfig {
   maxNewTokens?: number;
   /** Output sample rate */
   sampleRate?: number;
+  /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per session. */
+  outputFormat?: string;
   /** Auto-flush timeout in milliseconds */
   flushTimeoutMs?: number;
   /** Maximum buffer length */
@@ -377,11 +393,19 @@ export interface StreamConfig {
   /**
    * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
    *
-   * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-   * can also be used for per-segment speed control.
+   * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+   * whole request (no per-span control).
    * Range: [0.8, 1.2]. Default: 1.0.
    */
   speed?: number;
+  /**
+   * Per-request dictionary selection. Omit for the default behavior (all
+   * active dictionaries of the project apply, filtered by language). An
+   * empty array disables dictionaries for this request. A list of
+   * dictionary IDs applies exactly those dictionaries — including
+   * inactive ones — bypassing the language filter.
+   */
+  dictionaryIds?: number[];
 }
 /**
@@ -399,9 +423,18 @@ export interface StreamingSessionCallbacks {
    * Carries the segment index, total audio duration, and generation time.
    */
   onChunkComplete?: (chunkId: number, audioSeconds: number, genMs: number) => void;
+  /**
+   * Called when the server marks the end of a turn's audio
+   * (`{"final": true, ...}` — sent after the last audio frame of every
+   * gracefully completed turn, right before `session_closed`). The
+   * ElevenLabs `isFinal` equivalent: once this fires, no further audio
+   * for the turn will arrive. Not fired on a barge-in cancel — that
+   * path fires {@link onInterrupted} instead.
+   */
+  onFinal?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
   /**
    * Called when the session is fully closed (after `session.close()`).
-   * Equivalent to `onFinal` on the one-shot endpoint.
+   * Fires right after {@link onFinal} and additionally carries usage.
    */
   onSessionClosed?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
   /** Called when the server begins generating audio for a text segment. */
@@ -419,14 +452,71 @@ export interface StreamingSessionCallbacks {
   onError?: (error: Error) => void;
 }
+/**
+ * Per-session usage reported in the `session_closed` frame (KUG-1192).
+ *
+ * Lets you bill your own customers per conversation. `costCents` is the
+ * actual amount charged in **EUR cents**. When the charge could not be
+ * determined at session end (e.g. a transient billing error) `costCents` is
+ * `null` and `costAvailable` is `false` — never a misleading `0`.
+ * `audioSeconds` is always reported. On `/ws/tts/multi` usage is reported per
+ * context (per conversation) on each `context_closed` frame, not aggregated
+ * across contexts.
+ */
+export interface SessionUsage {
+  /** Total audio generated this session, in seconds (the unit we bill on). */
+  audioSeconds: number;
+  /** Actual amount charged in EUR cents, or `null` if undetermined. */
+  costCents: number | null;
+  /** Currency of `costCents` (`"eur"`); present only when `costCents` is set. */
+  currency?: string;
+  /** Total input characters submitted this session, if reported. */
+  characters?: number;
+  /** Model that produced the audio, if reported. */
+  modelId?: string;
+  /** `true` when an authoritative charge was returned for this session. */
+  costAvailable: boolean;
+}
+/**
+ * Parse the raw `usage` object (or a legacy `session_closed` payload without
+ * one) into a typed {@link SessionUsage}. Returns `null` when no usage info
+ * is present.
+ */
+export function parseSessionUsage(
+  data: Record<string, unknown>,
+): SessionUsage | null {
+  const raw = data.usage as Record<string, unknown> | undefined;
+  const source = raw && typeof raw === 'object' ? raw : data;
+  const audioSeconds =
+    typeof source.audio_seconds === 'number'
+      ? source.audio_seconds
+      : typeof data.total_audio_seconds === 'number'
+        ? data.total_audio_seconds
+        : undefined;
+  if (audioSeconds === undefined) return null;
+  const costCents =
+    typeof source.cost_cents === 'number' ? source.cost_cents : null;
+  return {
+    audioSeconds,
+    costCents,
+    currency:
+      typeof source.currency === 'string' ? source.currency : undefined,
+    characters:
+      typeof source.characters === 'number' ? source.characters : undefined,
+    modelId: typeof source.model_id === 'string' ? source.model_id : undefined,
+    costAvailable: costCents !== null,
+  };
+}
 /**
  * Audio chunk from streaming TTS.
  */
 export interface AudioChunk {
   /** Raw PCM16 audio as base64 */
   audio: string;
-  /** Encoding format */
-  encoding: 'pcm_s16le';
+  /** Encoding format. 'mulaw' / 'alaw' only when output_format requested G.711. */
+  encoding: 'pcm_s16le' | 'mulaw' | 'alaw';
   /** Chunk index */
   index: number;
   /** Sample rate */
@@ -453,6 +543,12 @@ export interface GenerationStats {
   rtf: number;
   /** Error message if any */
   error?: string;
+  /**
+   * Per-request usage (audio time + amount charged), for billing your own
+   * customers. Undefined when the server reports no usage. See
+   * {@link SessionUsage}.
+   */
+  usage?: SessionUsage;
 }
 /**
@@ -546,6 +642,8 @@ export interface MultiContextConfig {
   defaultVoiceId?: number;
   /** Output sample rate (default: 24000) */
   sampleRate?: number;
+  /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per context. */
+  outputFormat?: string;
   /** CFG scale for generation (default: 2.0) */
   cfgScale?: number;
   /**
@@ -563,6 +661,14 @@ export interface MultiContextConfig {
    * the language, which adds ~60-150ms to time-to-first-audio.
    */
   language?: string;
+  /**
+   * Per-request dictionary selection. Omit for the default behavior (all
+   * active dictionaries of the project apply, filtered by language). An
+   * empty array disables dictionaries for this request. A list of
+   * dictionary IDs applies exactly those dictionaries — including
+   * inactive ones — bypassing the language filter.
+   */
+  dictionaryIds?: number[];
   /** Seconds before context auto-closes (default: 20.0) */
   inactivityTimeout?: number;
 }
@@ -601,8 +707,20 @@ export interface MultiContextCallbacks {
   onContextCreated?: (contextId: string) => void;
   /** Called when an audio chunk is received */
   onChunk?: (chunk: MultiContextAudioChunk) => void;
-  /** Called when a context is closed */
-  onContextClosed?: (contextId: string) => void;
+  /**
+   * Called when all audio admitted before a `{flush: true}` has been
+   * delivered for a context (`{"final": true, "context_id": ...}`), and
+   * once more before {@link onContextClosed} on a graceful close. The
+   * ElevenLabs multi-context `is_final` equivalent. Not fired on an
+   * immediate (barge-in) close.
+   */
+  onFinal?: (contextId: string) => void;
+  /**
+   * Called when a context is closed (terminal). `usage` carries this
+   * conversation's audio time + amount charged (undefined if not reported).
+   * See {@link SessionUsage}.
+   */
+  onContextClosed?: (contextId: string, usage?: SessionUsage) => void;
   /** Called when a context times out */
   onContextTimeout?: (contextId: string) => void;
   /** Called when session is closed */