npm - kugelaudio - Versions diffs - 0.6.1 → 0.8.0 - Mend

kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,27 @@
+## [kugelaudio-v0.8.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.7.0...js-sdk-v0.8.0) (2026-06-10)
+### Features
+* **ingress,python-sdk,js-sdk,java-sdk:** per-session usage over WebSocket ([#1346](https://github.com/Kugelaudio/KugelAudio/issues/1346)) ([2881881](https://github.com/Kugelaudio/KugelAudio/commit/28818816dca9c8d222391691d70f458c0eb28ed8))
+* **ingress,python-sdk,js-sdk:** streaming final end-of-audio frame ([#1362](https://github.com/Kugelaudio/KugelAudio/issues/1362)) ([3fa95d2](https://github.com/Kugelaudio/KugelAudio/commit/3fa95d2f8597e6c9ced0aaf8370682dbcb123c71))
+* **ingress:** output_format token + server-side G.711 (KUG-1190) ([#1345](https://github.com/Kugelaudio/KugelAudio/issues/1345)) ([3723291](https://github.com/Kugelaudio/KugelAudio/commit/372329196c4c91aa41fe2111783872874b6e895b))
+* per-request dictionary selection (KUG-1094) ([#1361](https://github.com/Kugelaudio/KugelAudio/issues/1361)) ([3c28968](https://github.com/Kugelaudio/KugelAudio/commit/3c28968d32018bf3cafe1d312f32831668ea96b8))
+### Bug Fixes
+* **js-sdk,java-sdk,ingress:** multi-turn conversations work end-to-end + live SDK e2e bench in CI (KUG-1233) ([#1363](https://github.com/Kugelaudio/KugelAudio/issues/1363)) ([c0ed2a9](https://github.com/Kugelaudio/KugelAudio/commit/c0ed2a9cf41025bac5c7182c1a281eb600d8dd36))
+## [kugelaudio-v0.7.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.6.1...js-sdk-v0.7.0) (2026-06-06)
+### Features
+* **ingress:** add request observability metadata ([#1321](https://github.com/Kugelaudio/KugelAudio/issues/1321)) ([a9c5178](https://github.com/Kugelaudio/KugelAudio/commit/a9c5178193cb8b746a8bbd9b566b11f7b1d00f6d))
+* **sdks:** default all SDKs to kugel-3 model ([#1323](https://github.com/Kugelaudio/KugelAudio/issues/1323)) ([c4de212](https://github.com/Kugelaudio/KugelAudio/commit/c4de212c91e16326a15dbee5622acacc83ed85bb))
+### Bug Fixes
+* **js-sdk:** type SDK metadata fetch mock ([#1334](https://github.com/Kugelaudio/KugelAudio/issues/1334)) ([e8f6f59](https://github.com/Kugelaudio/KugelAudio/commit/e8f6f59595e123eaae8b44670c94fb4e7bc8d06c))
 ## [kugelaudio-v0.6.1](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.6.0...js-sdk-v0.6.1) (2026-06-04)
 ### Bug Fixes

package/README.md CHANGED Viewed

@@ -173,6 +173,7 @@ const audio = await client.tts.generate({
   cfgScale: 2.0,             // Guidance scale (1.0-5.0)
   maxNewTokens: 2048,        // Maximum tokens to generate
   sampleRate: 24000,         // Output sample rate
+  outputFormat: undefined,   // Optional: 'pcm_24000', 'ulaw_8000', 'alaw_8000', ...
   normalize: true,           // Enable text normalization (see below)
   language: 'en',            // Language for normalization
 });
@@ -298,12 +299,19 @@ for await (const token of llmTokenStream) {
 // Triggers the server-side final flush of any trailing text,
 // streams the resulting audio through onChunk, then closes the WS.
 await session.close();
+// Per-session usage — bill your own customers per conversation.
+// `costCents` is the actual charge in EUR cents (null if undetermined).
+const usage = session.lastUsage;
+if (usage) {
+  console.log(`audio: ${usage.audioSeconds}s, cost: ${usage.costCents ?? 'n/a'} ct`);
+}
 ```
 > ⚠️ **Do not call `session.send(text, true)` (`flush=true`) between
 > sentences or words.** Each explicit flush is a separate TTS request
 > that pays the full model time-to-first-audio (TTFA) again and produces
-> an audible gap. See [Streaming best practices](https://docs.kugelaudio.com/streaming-best-practices)
+> an audible gap. See [Chunking & per-segment latency](https://docs.kugelaudio.com/streaming/chunking-and-latency)
 > for the full rationale, chunk-size ordering, and ElevenLabs migration
 > notes.
@@ -413,6 +421,7 @@ interface GenerateOptions {
   cfgScale?: number;       // Default: 2.0
   maxNewTokens?: number;   // Default: 2048
   sampleRate?: number;     // Default: 24000
+  outputFormat?: string;   // 'pcm_8000' | 'pcm_16000' | 'pcm_22050' | 'pcm_24000' | 'ulaw_8000' | 'alaw_8000'
   normalize?: boolean;     // Default: true - Enable text normalization
   language?: string;       // ISO 639-1 code for normalization (e.g., 'en', 'de')
 }
@@ -425,7 +434,7 @@ interface GenerateOptions {
 ```typescript
 interface AudioChunk {
   audio: string;       // Base64-encoded PCM16 audio
-  encoding: string;    // 'pcm_s16le'
+  encoding: string;    // 'pcm_s16le' | 'mulaw' | 'alaw' (G.711 when outputFormat set)
   index: number;       // Chunk index (0-based)
   sampleRate: number;  // Sample rate (24000)
   samples: number;     // Number of samples in chunk

package/dist/index.d.mts CHANGED Viewed

@@ -217,7 +217,7 @@ interface WordTimestamp {
 interface GenerateOptions {
     /** Text to synthesize */
     text: string;
-    /** Model to use: 'kugel-1-turbo' (fast) or 'kugel-1' (premium). Default: 'kugel-1-turbo' */
+    /** Model to use. Default: 'kugel-3'. Legacy ids (kugel-2.5, kugel-1-turbo, …) still accepted; they alias to kugel-3 server-side. */
     modelId?: string;
     /** Voice ID to use */
     voiceId?: number;
@@ -235,6 +235,12 @@ interface GenerateOptions {
     maxNewTokens?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
+    /**
+     * Combined codec+rate token, e.g. 'ulaw_8000' / 'alaw_8000' / 'pcm_8000'.
+     * Opt-in; when set it is authoritative and must not contradict sampleRate.
+     * Absent ⇒ legacy PCM16 at sampleRate.
+     */
+    outputFormat?: string;
     /**
      * Enable text normalization (converts numbers, dates, etc. to spoken words).
      * When true, text will be normalized before TTS generation.
@@ -263,8 +269,8 @@ interface GenerateOptions {
     /**
      * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
      *
-     * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-     * can also be used for per-segment speed control.
+     * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+     * whole request (no per-span control).
      * Range: [0.8, 1.2]. Default: 1.0.
      */
     speed?: number;
@@ -275,6 +281,14 @@ interface GenerateOptions {
      * server treats the value as trusted once received.
      */
     projectId?: number;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
 }
 /**
  * Streaming session configuration for `/ws/tts/stream`.
@@ -296,7 +310,7 @@ interface GenerateOptions {
 interface StreamConfig {
     /** Voice ID to use */
     voiceId?: number;
-    /** Model ID ('kugel-1-turbo' or 'kugel-1'). Default: 'kugel-1-turbo' */
+    /** Model ID. Default: 'kugel-3'. Legacy ids still accepted; they alias to kugel-3 server-side. */
     modelId?: string;
     /** CFG scale for generation */
     cfgScale?: number;
@@ -309,6 +323,8 @@ interface StreamConfig {
     maxNewTokens?: number;
     /** Output sample rate */
     sampleRate?: number;
+    /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per session. */
+    outputFormat?: string;
     /** Auto-flush timeout in milliseconds */
     flushTimeoutMs?: number;
     /** Maximum buffer length */
@@ -353,11 +369,19 @@ interface StreamConfig {
     /**
      * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
      *
-     * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-     * can also be used for per-segment speed control.
+     * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+     * whole request (no per-span control).
      * Range: [0.8, 1.2]. Default: 1.0.
      */
     speed?: number;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
 }
 /**
  * Event callbacks for a streaming session (`/ws/tts/stream`).
@@ -374,9 +398,18 @@ interface StreamingSessionCallbacks {
      * Carries the segment index, total audio duration, and generation time.
      */
     onChunkComplete?: (chunkId: number, audioSeconds: number, genMs: number) => void;
+    /**
+     * Called when the server marks the end of a turn's audio
+     * (`{"final": true, ...}` — sent after the last audio frame of every
+     * gracefully completed turn, right before `session_closed`). The
+     * ElevenLabs `isFinal` equivalent: once this fires, no further audio
+     * for the turn will arrive. Not fired on a barge-in cancel — that
+     * path fires {@link onInterrupted} instead.
+     */
+    onFinal?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
     /**
      * Called when the session is fully closed (after `session.close()`).
-     * Equivalent to `onFinal` on the one-shot endpoint.
+     * Fires right after {@link onFinal} and additionally carries usage.
      */
     onSessionClosed?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
     /** Called when the server begins generating audio for a text segment. */
@@ -393,14 +426,45 @@ interface StreamingSessionCallbacks {
     /** Called on any error. */
     onError?: (error: Error) => void;
 }
+/**
+ * Per-session usage reported in the `session_closed` frame (KUG-1192).
+ *
+ * Lets you bill your own customers per conversation. `costCents` is the
+ * actual amount charged in **EUR cents**. When the charge could not be
+ * determined at session end (e.g. a transient billing error) `costCents` is
+ * `null` and `costAvailable` is `false` — never a misleading `0`.
+ * `audioSeconds` is always reported. On `/ws/tts/multi` usage is reported per
+ * context (per conversation) on each `context_closed` frame, not aggregated
+ * across contexts.
+ */
+interface SessionUsage {
+    /** Total audio generated this session, in seconds (the unit we bill on). */
+    audioSeconds: number;
+    /** Actual amount charged in EUR cents, or `null` if undetermined. */
+    costCents: number | null;
+    /** Currency of `costCents` (`"eur"`); present only when `costCents` is set. */
+    currency?: string;
+    /** Total input characters submitted this session, if reported. */
+    characters?: number;
+    /** Model that produced the audio, if reported. */
+    modelId?: string;
+    /** `true` when an authoritative charge was returned for this session. */
+    costAvailable: boolean;
+}
+/**
+ * Parse the raw `usage` object (or a legacy `session_closed` payload without
+ * one) into a typed {@link SessionUsage}. Returns `null` when no usage info
+ * is present.
+ */
+declare function parseSessionUsage(data: Record<string, unknown>): SessionUsage | null;
 /**
  * Audio chunk from streaming TTS.
  */
 interface AudioChunk {
     /** Raw PCM16 audio as base64 */
     audio: string;
-    /** Encoding format */
-    encoding: 'pcm_s16le';
+    /** Encoding format. 'mulaw' / 'alaw' only when output_format requested G.711. */
+    encoding: 'pcm_s16le' | 'mulaw' | 'alaw';
     /** Chunk index */
     index: number;
     /** Sample rate */
@@ -426,6 +490,12 @@ interface GenerationStats {
     rtf: number;
     /** Error message if any */
     error?: string;
+    /**
+     * Per-request usage (audio time + amount charged), for billing your own
+     * customers. Undefined when the server reports no usage. See
+     * {@link SessionUsage}.
+     */
+    usage?: SessionUsage;
 }
 /**
  * Complete audio response from TTS generation.
@@ -505,6 +575,8 @@ interface MultiContextConfig {
     defaultVoiceId?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
+    /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per context. */
+    outputFormat?: string;
     /** CFG scale for generation (default: 2.0) */
     cfgScale?: number;
     /**
@@ -522,6 +594,14 @@ interface MultiContextConfig {
      * the language, which adds ~60-150ms to time-to-first-audio.
      */
     language?: string;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
     /** Seconds before context auto-closes (default: 20.0) */
     inactivityTimeout?: number;
 }
@@ -557,8 +637,20 @@ interface MultiContextCallbacks {
     onContextCreated?: (contextId: string) => void;
     /** Called when an audio chunk is received */
     onChunk?: (chunk: MultiContextAudioChunk) => void;
-    /** Called when a context is closed */
-    onContextClosed?: (contextId: string) => void;
+    /**
+     * Called when all audio admitted before a `{flush: true}` has been
+     * delivered for a context (`{"final": true, "context_id": ...}`), and
+     * once more before {@link onContextClosed} on a graceful close. The
+     * ElevenLabs multi-context `is_final` equivalent. Not fired on an
+     * immediate (barge-in) close.
+     */
+    onFinal?: (contextId: string) => void;
+    /**
+     * Called when a context is closed (terminal). `usage` carries this
+     * conversation's audio time + amount charged (undefined if not reported).
+     * See {@link SessionUsage}.
+     */
+    onContextClosed?: (contextId: string, usage?: SessionUsage) => void;
     /** Called when a context times out */
     onContextTimeout?: (contextId: string) => void;
     /** Called when session is closed */
@@ -912,13 +1004,25 @@ declare class MultiContextSession {
     private config;
     private callbacks;
     private contexts;
+    /** Contexts a create message has been sent for (not yet necessarily
+     *  confirmed by the server via context_created). */
+    private requestedContexts;
     private _sessionId;
+    private _contextUsage;
     private isStarted;
     constructor(client: KugelAudio, config?: MultiContextConfig);
     /**
      * Get the current session ID, or null if not connected.
      */
     get sessionId(): string | null;
+    /**
+     * Per-context usage (audio time + amount charged) for a closed context, or
+     * null if that context hasn't closed yet. Each context is its own
+     * conversation — use this to bill per conversation. See {@link SessionUsage}.
+     */
+    usageFor(contextId: string): SessionUsage | null;
+    /** Map of context_id → per-context usage for all closed contexts. */
+    get contextUsage(): Map<string, SessionUsage>;
     /**
      * Connect to the multi-context WebSocket endpoint.
      *
@@ -1005,7 +1109,14 @@ declare class StreamingSession {
     private callbacks;
     private client;
     private configSent;
+    private _lastUsage;
     constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks);
+    /**
+     * Per-session usage from the most recently closed session, or null before
+     * the first session closes. Use this to bill your own customers per
+     * conversation. See {@link SessionUsage}.
+     */
+    get lastUsage(): SessionUsage | null;
     /**
      * Open the WebSocket connection and authenticate.
      *
@@ -1354,4 +1465,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
  */
 declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
-export { type AudioChunk, type AudioResponse, AuthenticationError, type BulkReplaceResult, ConnectionError, type ContextVoiceSettings, type CreateDictionaryOptions, type CreateVoiceOptions, DictionariesResource, type Dictionary, DictionaryEntriesResource, type DictionaryEntry, type DictionaryEntryInput, type DictionaryEntryListResponse, type ErrorCode, ErrorCodes, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioErrorOptions, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, NotFoundError, RateLimitError, type Region, type StreamCallbacks, type StreamConfig, type StreamingSessionCallbacks, type UpdateDictionaryEntryOptions, type UpdateDictionaryOptions, type UpdateVoiceOptions, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceDetail, type VoiceListResponse, type VoiceQuality, type VoiceReference, type VoiceSex, type WordTimestamp, WsCloseCodes, base64ToArrayBuffer, classifyHttpError, classifyWsClose, classifyWsFrame, classifyWsHandshakeError, createWavBlob, createWavFile, decodePCM16 };
+export { type AudioChunk, type AudioResponse, AuthenticationError, type BulkReplaceResult, ConnectionError, type ContextVoiceSettings, type CreateDictionaryOptions, type CreateVoiceOptions, DictionariesResource, type Dictionary, DictionaryEntriesResource, type DictionaryEntry, type DictionaryEntryInput, type DictionaryEntryListResponse, type ErrorCode, ErrorCodes, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioErrorOptions, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, NotFoundError, RateLimitError, type Region, type SessionUsage, type StreamCallbacks, type StreamConfig, type StreamingSessionCallbacks, type UpdateDictionaryEntryOptions, type UpdateDictionaryOptions, type UpdateVoiceOptions, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceDetail, type VoiceListResponse, type VoiceQuality, type VoiceReference, type VoiceSex, type WordTimestamp, WsCloseCodes, base64ToArrayBuffer, classifyHttpError, classifyWsClose, classifyWsFrame, classifyWsHandshakeError, createWavBlob, createWavFile, decodePCM16, parseSessionUsage };

package/dist/index.d.ts CHANGED Viewed

@@ -217,7 +217,7 @@ interface WordTimestamp {
 interface GenerateOptions {
     /** Text to synthesize */
     text: string;
-    /** Model to use: 'kugel-1-turbo' (fast) or 'kugel-1' (premium). Default: 'kugel-1-turbo' */
+    /** Model to use. Default: 'kugel-3'. Legacy ids (kugel-2.5, kugel-1-turbo, …) still accepted; they alias to kugel-3 server-side. */
     modelId?: string;
     /** Voice ID to use */
     voiceId?: number;
@@ -235,6 +235,12 @@ interface GenerateOptions {
     maxNewTokens?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
+    /**
+     * Combined codec+rate token, e.g. 'ulaw_8000' / 'alaw_8000' / 'pcm_8000'.
+     * Opt-in; when set it is authoritative and must not contradict sampleRate.
+     * Absent ⇒ legacy PCM16 at sampleRate.
+     */
+    outputFormat?: string;
     /**
      * Enable text normalization (converts numbers, dates, etc. to spoken words).
      * When true, text will be normalized before TTS generation.
@@ -263,8 +269,8 @@ interface GenerateOptions {
     /**
      * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
      *
-     * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-     * can also be used for per-segment speed control.
+     * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+     * whole request (no per-span control).
      * Range: [0.8, 1.2]. Default: 1.0.
      */
     speed?: number;
@@ -275,6 +281,14 @@ interface GenerateOptions {
      * server treats the value as trusted once received.
      */
     projectId?: number;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
 }
 /**
  * Streaming session configuration for `/ws/tts/stream`.
@@ -296,7 +310,7 @@ interface GenerateOptions {
 interface StreamConfig {
     /** Voice ID to use */
     voiceId?: number;
-    /** Model ID ('kugel-1-turbo' or 'kugel-1'). Default: 'kugel-1-turbo' */
+    /** Model ID. Default: 'kugel-3'. Legacy ids still accepted; they alias to kugel-3 server-side. */
     modelId?: string;
     /** CFG scale for generation */
     cfgScale?: number;
@@ -309,6 +323,8 @@ interface StreamConfig {
     maxNewTokens?: number;
     /** Output sample rate */
     sampleRate?: number;
+    /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per session. */
+    outputFormat?: string;
     /** Auto-flush timeout in milliseconds */
     flushTimeoutMs?: number;
     /** Maximum buffer length */
@@ -353,11 +369,19 @@ interface StreamConfig {
     /**
      * Playback speed multiplier (0.8 = slower, 1.0 = normal, 1.2 = faster).
      *
-     * Uses pitch-preserving time-stretching (WSOLA). Inline `<prosody rate="...">` tags
-     * can also be used for per-segment speed control.
+     * Uses pitch-preserving time-stretching (WSOLA); applies uniformly to the
+     * whole request (no per-span control).
      * Range: [0.8, 1.2]. Default: 1.0.
      */
     speed?: number;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
 }
 /**
  * Event callbacks for a streaming session (`/ws/tts/stream`).
@@ -374,9 +398,18 @@ interface StreamingSessionCallbacks {
      * Carries the segment index, total audio duration, and generation time.
      */
     onChunkComplete?: (chunkId: number, audioSeconds: number, genMs: number) => void;
+    /**
+     * Called when the server marks the end of a turn's audio
+     * (`{"final": true, ...}` — sent after the last audio frame of every
+     * gracefully completed turn, right before `session_closed`). The
+     * ElevenLabs `isFinal` equivalent: once this fires, no further audio
+     * for the turn will arrive. Not fired on a barge-in cancel — that
+     * path fires {@link onInterrupted} instead.
+     */
+    onFinal?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
     /**
      * Called when the session is fully closed (after `session.close()`).
-     * Equivalent to `onFinal` on the one-shot endpoint.
+     * Fires right after {@link onFinal} and additionally carries usage.
      */
     onSessionClosed?: (totalAudioSeconds: number, totalTextChunks: number, totalAudioChunks: number) => void;
     /** Called when the server begins generating audio for a text segment. */
@@ -393,14 +426,45 @@ interface StreamingSessionCallbacks {
     /** Called on any error. */
     onError?: (error: Error) => void;
 }
+/**
+ * Per-session usage reported in the `session_closed` frame (KUG-1192).
+ *
+ * Lets you bill your own customers per conversation. `costCents` is the
+ * actual amount charged in **EUR cents**. When the charge could not be
+ * determined at session end (e.g. a transient billing error) `costCents` is
+ * `null` and `costAvailable` is `false` — never a misleading `0`.
+ * `audioSeconds` is always reported. On `/ws/tts/multi` usage is reported per
+ * context (per conversation) on each `context_closed` frame, not aggregated
+ * across contexts.
+ */
+interface SessionUsage {
+    /** Total audio generated this session, in seconds (the unit we bill on). */
+    audioSeconds: number;
+    /** Actual amount charged in EUR cents, or `null` if undetermined. */
+    costCents: number | null;
+    /** Currency of `costCents` (`"eur"`); present only when `costCents` is set. */
+    currency?: string;
+    /** Total input characters submitted this session, if reported. */
+    characters?: number;
+    /** Model that produced the audio, if reported. */
+    modelId?: string;
+    /** `true` when an authoritative charge was returned for this session. */
+    costAvailable: boolean;
+}
+/**
+ * Parse the raw `usage` object (or a legacy `session_closed` payload without
+ * one) into a typed {@link SessionUsage}. Returns `null` when no usage info
+ * is present.
+ */
+declare function parseSessionUsage(data: Record<string, unknown>): SessionUsage | null;
 /**
  * Audio chunk from streaming TTS.
  */
 interface AudioChunk {
     /** Raw PCM16 audio as base64 */
     audio: string;
-    /** Encoding format */
-    encoding: 'pcm_s16le';
+    /** Encoding format. 'mulaw' / 'alaw' only when output_format requested G.711. */
+    encoding: 'pcm_s16le' | 'mulaw' | 'alaw';
     /** Chunk index */
     index: number;
     /** Sample rate */
@@ -426,6 +490,12 @@ interface GenerationStats {
     rtf: number;
     /** Error message if any */
     error?: string;
+    /**
+     * Per-request usage (audio time + amount charged), for billing your own
+     * customers. Undefined when the server reports no usage. See
+     * {@link SessionUsage}.
+     */
+    usage?: SessionUsage;
 }
 /**
  * Complete audio response from TTS generation.
@@ -505,6 +575,8 @@ interface MultiContextConfig {
     defaultVoiceId?: number;
     /** Output sample rate (default: 24000) */
     sampleRate?: number;
+    /** Combined codec+rate token (e.g. 'ulaw_8000'); opt-in, set-once per context. */
+    outputFormat?: string;
     /** CFG scale for generation (default: 2.0) */
     cfgScale?: number;
     /**
@@ -522,6 +594,14 @@ interface MultiContextConfig {
      * the language, which adds ~60-150ms to time-to-first-audio.
      */
     language?: string;
+    /**
+     * Per-request dictionary selection. Omit for the default behavior (all
+     * active dictionaries of the project apply, filtered by language). An
+     * empty array disables dictionaries for this request. A list of
+     * dictionary IDs applies exactly those dictionaries — including
+     * inactive ones — bypassing the language filter.
+     */
+    dictionaryIds?: number[];
     /** Seconds before context auto-closes (default: 20.0) */
     inactivityTimeout?: number;
 }
@@ -557,8 +637,20 @@ interface MultiContextCallbacks {
     onContextCreated?: (contextId: string) => void;
     /** Called when an audio chunk is received */
     onChunk?: (chunk: MultiContextAudioChunk) => void;
-    /** Called when a context is closed */
-    onContextClosed?: (contextId: string) => void;
+    /**
+     * Called when all audio admitted before a `{flush: true}` has been
+     * delivered for a context (`{"final": true, "context_id": ...}`), and
+     * once more before {@link onContextClosed} on a graceful close. The
+     * ElevenLabs multi-context `is_final` equivalent. Not fired on an
+     * immediate (barge-in) close.
+     */
+    onFinal?: (contextId: string) => void;
+    /**
+     * Called when a context is closed (terminal). `usage` carries this
+     * conversation's audio time + amount charged (undefined if not reported).
+     * See {@link SessionUsage}.
+     */
+    onContextClosed?: (contextId: string, usage?: SessionUsage) => void;
     /** Called when a context times out */
     onContextTimeout?: (contextId: string) => void;
     /** Called when session is closed */
@@ -912,13 +1004,25 @@ declare class MultiContextSession {
     private config;
     private callbacks;
     private contexts;
+    /** Contexts a create message has been sent for (not yet necessarily
+     *  confirmed by the server via context_created). */
+    private requestedContexts;
     private _sessionId;
+    private _contextUsage;
     private isStarted;
     constructor(client: KugelAudio, config?: MultiContextConfig);
     /**
      * Get the current session ID, or null if not connected.
      */
     get sessionId(): string | null;
+    /**
+     * Per-context usage (audio time + amount charged) for a closed context, or
+     * null if that context hasn't closed yet. Each context is its own
+     * conversation — use this to bill per conversation. See {@link SessionUsage}.
+     */
+    usageFor(contextId: string): SessionUsage | null;
+    /** Map of context_id → per-context usage for all closed contexts. */
+    get contextUsage(): Map<string, SessionUsage>;
     /**
      * Connect to the multi-context WebSocket endpoint.
      *
@@ -1005,7 +1109,14 @@ declare class StreamingSession {
     private callbacks;
     private client;
     private configSent;
+    private _lastUsage;
     constructor(client: KugelAudio, config: StreamConfig, callbacks: StreamingSessionCallbacks);
+    /**
+     * Per-session usage from the most recently closed session, or null before
+     * the first session closes. Use this to bill your own customers per
+     * conversation. See {@link SessionUsage}.
+     */
+    get lastUsage(): SessionUsage | null;
     /**
      * Open the WebSocket connection and authenticate.
      *
@@ -1354,4 +1465,4 @@ declare function createWavFile(audio: ArrayBuffer, sampleRate: number): ArrayBuf
  */
 declare function createWavBlob(audio: ArrayBuffer, sampleRate: number): Blob;
-export { type AudioChunk, type AudioResponse, AuthenticationError, type BulkReplaceResult, ConnectionError, type ContextVoiceSettings, type CreateDictionaryOptions, type CreateVoiceOptions, DictionariesResource, type Dictionary, DictionaryEntriesResource, type DictionaryEntry, type DictionaryEntryInput, type DictionaryEntryListResponse, type ErrorCode, ErrorCodes, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioErrorOptions, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, NotFoundError, RateLimitError, type Region, type StreamCallbacks, type StreamConfig, type StreamingSessionCallbacks, type UpdateDictionaryEntryOptions, type UpdateDictionaryOptions, type UpdateVoiceOptions, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceDetail, type VoiceListResponse, type VoiceQuality, type VoiceReference, type VoiceSex, type WordTimestamp, WsCloseCodes, base64ToArrayBuffer, classifyHttpError, classifyWsClose, classifyWsFrame, classifyWsHandshakeError, createWavBlob, createWavFile, decodePCM16 };
+export { type AudioChunk, type AudioResponse, AuthenticationError, type BulkReplaceResult, ConnectionError, type ContextVoiceSettings, type CreateDictionaryOptions, type CreateVoiceOptions, DictionariesResource, type Dictionary, DictionaryEntriesResource, type DictionaryEntry, type DictionaryEntryInput, type DictionaryEntryListResponse, type ErrorCode, ErrorCodes, type GenerateOptions, type GenerationStats, InsufficientCreditsError, KugelAudio, KugelAudioError, type KugelAudioErrorOptions, type KugelAudioOptions, type Model, type MultiContextAudioChunk, type MultiContextCallbacks, type MultiContextConfig, NotFoundError, RateLimitError, type Region, type SessionUsage, type StreamCallbacks, type StreamConfig, type StreamingSessionCallbacks, type UpdateDictionaryEntryOptions, type UpdateDictionaryOptions, type UpdateVoiceOptions, ValidationError, type Voice, type VoiceAge, type VoiceCategory, type VoiceDetail, type VoiceListResponse, type VoiceQuality, type VoiceReference, type VoiceSex, type WordTimestamp, WsCloseCodes, base64ToArrayBuffer, classifyHttpError, classifyWsClose, classifyWsFrame, classifyWsHandshakeError, createWavBlob, createWavFile, decodePCM16, parseSessionUsage };