npm - @omote/core - Versions diffs - 0.6.6 → 0.7.1 - Mend

@omote/core 0.6.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +17 -16
package/dist/{Logger-I_k4sGhM.d.mts → Logger-DSoGAYJu.d.mts} +1 -1
package/dist/{Logger-I_k4sGhM.d.ts → Logger-DSoGAYJu.d.ts} +1 -1
package/dist/index.d.mts +1744 -972
package/dist/index.d.ts +1744 -972
package/dist/index.js +5293 -2735
package/dist/index.js.map +1 -1
package/dist/index.mjs +6454 -3896
package/dist/index.mjs.map +1 -1
package/dist/logging/index.d.mts +2 -2
package/dist/logging/index.d.ts +2 -2
package/package.json +2 -1

package/dist/index.d.mts CHANGED Viewed

@@ -1,8 +1,60 @@
 import { EventEmitter, OmoteEvents } from './events/index.mjs';
 export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
-export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
+export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, a as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, c as LogLevel, d as LogSink, e as LoggingConfig, g as configureLogging, h as createLogger, i as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, k as setLoggingEnabled } from './Logger-DSoGAYJu.mjs';
 export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
+/**
+ * Audio format conversion utilities
+ *
+ * Bridges the gap between TTS engines (Float32 at various sample rates)
+ * and playback pipelines (Uint8Array PCM16 at 16kHz).
+ *
+ * @module audio/audioConvert
+ */
+/**
+ * Convert Float32 [-1,1] samples to PCM16 Uint8Array (little-endian).
+ *
+ * @param samples - Float32Array of normalized audio samples
+ * @returns Uint8Array of PCM16 bytes (2 bytes per sample, little-endian)
+ */
+declare function float32ToPcm16(samples: Float32Array): Uint8Array;
+/**
+ * Linear interpolation resampler.
+ * Good enough for speech (no sinc filtering needed).
+ *
+ * @param samples - Input audio samples
+ * @param fromRate - Source sample rate (e.g., 24000)
+ * @param toRate - Target sample rate (e.g., 16000)
+ * @returns Resampled Float32Array
+ */
+declare function resampleLinear(samples: Float32Array, fromRate: number, toRate: number): Float32Array;
+/**
+ * Convenience: resample + encode in one call.
+ * Converts TTS output (Float32 at TTS rate) to pipeline format (PCM16 Uint8Array at 16kHz).
+ *
+ * @param audio - Float32Array from TTS engine
+ * @param sourceRate - TTS engine's output sample rate (default: 24000)
+ * @param targetRate - Pipeline's expected sample rate (default: 16000)
+ * @returns Uint8Array PCM16 at target rate
+ */
+declare function ttsToPlaybackFormat(audio: Float32Array, sourceRate?: number, targetRate?: number): Uint8Array;
+/**
+ * Shared audio utility functions
+ *
+ * @module audio
+ */
+/**
+ * Safely convert an ArrayBuffer of PCM16 bytes to Float32 samples.
+ * Handles odd-length buffers by truncating to the nearest even byte boundary.
+ */
+declare function pcm16ToFloat32(buffer: ArrayBuffer): Float32Array;
+/**
+ * Convert Int16Array samples to Float32Array.
+ * Each sample is divided by 32768 to normalize to [-1, 1] range.
+ */
+declare function int16ToFloat32(int16: Int16Array): Float32Array;
 /**
  * Microphone capture - renderer-agnostic audio input
  *
@@ -540,6 +592,147 @@ declare const BLENDSHAPE_TO_GROUP: Map<string, BlendshapeGroup>;
  */
 declare function applyProfile(raw: Float32Array, profile: ExpressionProfile): Float32Array;
+/**
+ * PlaybackPipeline - Audio playback + A2E lip sync with ExpressionProfile scaling
+ *
+ * Refactored superset of FullFacePipeline. Adds:
+ * - Sync mode (`feedBuffer`) for pre-recorded audio
+ * - State tracking (idle → playing → stopping)
+ * - Opt-in neutral transition animation on playback complete
+ * - Idempotent `start()` (no spurious playback:complete on restart)
+ *
+ * @category Audio
+ */
+type PlaybackState = 'idle' | 'playing' | 'stopping';
+interface PlaybackPipelineConfig {
+    /** A2E inference backend (from createA2E) */
+    lam: A2EBackend;
+    /** Sample rate in Hz (default: 16000) */
+    sampleRate?: number;
+    /** Target chunk duration for coalescing in ms (default: 200) */
+    chunkTargetMs?: number;
+    /** Audio playback delay in ms (default: auto-detected from backend) */
+    audioDelayMs?: number;
+    /** A2E inference chunk size in samples (default: 16000) */
+    chunkSize?: number;
+    /** Identity/style index for Wav2Vec2 (default: 0) */
+    identityIndex?: number;
+    /** Per-character expression weight scaling */
+    profile?: ExpressionProfile;
+    /** Enable neutral transition on playback complete (default: false) */
+    neutralTransitionEnabled?: boolean;
+    /** Duration of neutral fade-out in ms (default: 250). Only applies when neutralTransitionEnabled=true. */
+    neutralTransitionMs?: number;
+    /** Stale frame warning threshold in ms (default: 2000) */
+    staleThresholdMs?: number;
+}
+/**
+ * Full face frame with scaled blendshapes
+ */
+interface FullFaceFrame {
+    /** Scaled 52 ARKit blendshapes (ExpressionProfile applied) */
+    blendshapes: Float32Array;
+    /** Raw A2E output (52 blendshapes, before profile scaling) */
+    rawBlendshapes: Float32Array;
+    /** AudioContext timestamp for this frame */
+    timestamp: number;
+}
+interface PlaybackPipelineEvents {
+    /** New frame ready for display (scaled by ExpressionProfile) */
+    'frame': FullFaceFrame;
+    /** Raw A2E frame (before profile scaling) */
+    'frame:raw': Float32Array;
+    /** Playback started (first audio scheduled) */
+    'playback:start': {
+        time: number;
+    };
+    /** Playback completed naturally */
+    'playback:complete': void;
+    /** Playback stopped (user-initiated) */
+    'playback:stop': void;
+    /** Error occurred */
+    'error': Error;
+    /** State changed */
+    'state': PlaybackState;
+    'full_frame_ready': FullFaceFrame;
+    'lam_frame_ready': Float32Array;
+    'playback_complete': void;
+    'playback_start': number;
+    [key: string]: unknown;
+}
+declare class PlaybackPipeline extends EventEmitter<PlaybackPipelineEvents> {
+    private readonly config;
+    private scheduler;
+    private coalescer;
+    private processor;
+    private readonly sampleRate;
+    private _state;
+    private playbackStarted;
+    private monitorInterval;
+    private frameAnimationId;
+    private lastNewFrameTime;
+    private lastKnownLamFrame;
+    private staleWarningEmitted;
+    private readonly staleThresholdMs;
+    private frameLoopCount;
+    private profile;
+    private readonly neutralTransitionEnabled;
+    private readonly neutralTransitionMs;
+    private neutralTransitionFrame;
+    private neutralTransitionStart;
+    private neutralAnimationId;
+    private _currentFrame;
+    private _currentRawFrame;
+    /** Current pipeline state */
+    get state(): PlaybackState;
+    /** Current scaled blendshapes (updated in-place for perf) */
+    get currentFrame(): Float32Array | null;
+    /** Raw A2E blendshapes (before profile scaling) */
+    get currentRawFrame(): Float32Array | null;
+    constructor(config: PlaybackPipelineConfig);
+    /** Initialize AudioContext (lazy, call after user gesture) */
+    initialize(): Promise<void>;
+    /** Update ExpressionProfile at runtime */
+    setProfile(profile: ExpressionProfile): void;
+    /**
+     * Start a new playback session.
+     * Idempotent — calling during playback resets cleanly without emitting
+     * spurious playback:complete.
+     */
+    start(): void;
+    /** Feed a streaming audio chunk (PCM16 Uint8Array) */
+    onAudioChunk(chunk: Uint8Array): Promise<void>;
+    /** Signal end of audio stream (flushes remaining audio) */
+    end(): Promise<void>;
+    /**
+     * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
+     * for playback, runs A2E inference, then waits for completion.
+     */
+    feedBuffer(audio: ArrayBuffer | Float32Array): Promise<void>;
+    /** Stop playback immediately with fade-out */
+    stop(fadeOutMs?: number): Promise<void>;
+    /** Cleanup all resources */
+    dispose(): void;
+    /** Get pipeline debug state */
+    getDebugState(): {
+        state: PlaybackState;
+        playbackStarted: boolean;
+        coalescerFill: number;
+        processorFill: number;
+        queuedFrames: number;
+        currentTime: number;
+        playbackEndTime: number;
+    };
+    private startFrameLoop;
+    private startMonitoring;
+    private onPlaybackComplete;
+    private startNeutralTransition;
+    private cancelNeutralTransition;
+    private stopInternal;
+    private setState;
+}
 /**
  * FullFacePipeline - A2E expression pipeline with ExpressionProfile weight scaling
  *
@@ -624,23 +817,12 @@ interface FullFacePipelineOptions {
      */
     staleThresholdMs?: number;
 }
-/**
- * Full face frame with scaled blendshapes
- */
-interface FullFaceFrame$1 {
-    /** Scaled 52 ARKit blendshapes (ExpressionProfile applied) */
-    blendshapes: Float32Array;
-    /** Raw A2E output (52 blendshapes, before profile scaling) */
-    rawBlendshapes: Float32Array;
-    /** AudioContext timestamp for this frame */
-    timestamp: number;
-}
 /**
  * Events emitted by FullFacePipeline
  */
 interface FullFacePipelineEvents {
     /** New merged frame ready for display */
-    full_frame_ready: FullFaceFrame$1;
+    full_frame_ready: FullFaceFrame;
     /** Raw LAM frame ready (for debugging/monitoring) */
     lam_frame_ready: Float32Array;
     /** Playback has completed */
@@ -747,144 +929,131 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
 }
 /**
- * PlaybackPipeline - Audio playback + A2E lip sync with ExpressionProfile scaling
+ * TTSBackend — Streaming text-to-speech backend interface.
  *
- * Refactored superset of FullFacePipeline. Adds:
- * - Sync mode (`feedBuffer`) for pre-recorded audio
- * - State tracking (idle → playing → stopping)
- * - Opt-in neutral transition animation on playback complete
- * - Idempotent `start()` (no spurious playback:complete on restart)
+ * Any TTS engine (Kokoro, ElevenLabs, etc.) can implement this contract
+ * to integrate with TTSPlayback and VoicePipeline.
+ *
+ * @category Inference
+ */
+/**
+ * Streaming TTS backend interface.
+ *
+ * Implementations must provide:
+ * - `stream()` for sentence-by-sentence audio generation
+ * - `sampleRate` for format conversion
+ * - `load()` for model initialization
+ *
+ * @example
+ * ```typescript
+ * const kokoro: TTSBackend = new KokoroTTSInference({ defaultVoice: 'af_heart' });
+ * await kokoro.load();
+ *
+ * for await (const chunk of kokoro.stream("Hello world!", { voice: 'af_heart' })) {
+ *   // chunk.audio is Float32Array at kokoro.sampleRate
+ * }
+ * ```
+ */
+interface TTSBackend {
+    /** Stream audio chunks for given text. Each chunk: Float32Array at engine's native rate. */
+    stream(text: string, options?: TTSStreamOptions): AsyncGenerator<TTSChunk>;
+    /** Engine's native output sample rate (e.g., 24000 for Kokoro). */
+    readonly sampleRate: number;
+    /** Load model if not already loaded. */
+    load(): Promise<unknown>;
+    /** Whether model is loaded and ready. */
+    readonly isLoaded: boolean;
+    /** Release resources. */
+    dispose(): Promise<void>;
+}
+/**
+ * Options for TTSBackend.stream()
+ */
+interface TTSStreamOptions {
+    /** Abort signal for cancellation */
+    signal?: AbortSignal;
+    /** Voice override per-call */
+    voice?: string;
+    /** Speed multiplier override per-call */
+    speed?: number;
+}
+/**
+ * A single chunk of TTS audio output
+ */
+interface TTSChunk {
+    /** Audio samples at engine's native sample rate */
+    audio: Float32Array;
+    /** Duration in seconds */
+    duration: number;
+    /** Sentence/segment text that produced this audio */
+    text?: string;
+}
+/**
+ * TTSPlayback — Composes TTSBackend + PlaybackPipeline for text → lip sync.
+ *
+ * Handles format conversion (Float32 @ TTS rate → PCM16 @ 16kHz)
+ * and sentence prefetch for gapless playback.
  *
  * @category Audio
  */
-type PlaybackState = 'idle' | 'playing' | 'stopping';
-interface PlaybackPipelineConfig {
+interface TTSPlaybackConfig {
+    /** TTS backend (e.g., KokoroTTSInference) */
+    tts: TTSBackend;
     /** A2E inference backend (from createA2E) */
     lam: A2EBackend;
-    /** Sample rate in Hz (default: 16000) */
-    sampleRate?: number;
-    /** Target chunk duration for coalescing in ms (default: 200) */
-    chunkTargetMs?: number;
-    /** Audio playback delay in ms (default: auto-detected from backend) */
-    audioDelayMs?: number;
-    /** A2E inference chunk size in samples (default: 16000) */
-    chunkSize?: number;
-    /** Identity/style index for Wav2Vec2 (default: 0) */
-    identityIndex?: number;
     /** Per-character expression weight scaling */
     profile?: ExpressionProfile;
-    /** Enable neutral transition on playback complete (default: false) */
+    /** Prefetch next sentence while current plays. Default: true */
+    prefetch?: boolean;
+    /** Identity/style index for Wav2Vec2 (default: 0) */
+    identityIndex?: number;
+    /** Audio playback delay in ms */
+    audioDelayMs?: number;
+    /** Enable neutral transition on playback complete */
     neutralTransitionEnabled?: boolean;
-    /** Duration of neutral fade-out in ms (default: 250). Only applies when neutralTransitionEnabled=true. */
+    /** Duration of neutral fade-out in ms */
     neutralTransitionMs?: number;
-    /** Stale frame warning threshold in ms (default: 2000) */
-    staleThresholdMs?: number;
-}
-/**
- * Full face frame with scaled blendshapes
- */
-interface FullFaceFrame {
-    /** Scaled 52 ARKit blendshapes (ExpressionProfile applied) */
-    blendshapes: Float32Array;
-    /** Raw A2E output (52 blendshapes, before profile scaling) */
-    rawBlendshapes: Float32Array;
-    /** AudioContext timestamp for this frame */
-    timestamp: number;
 }
-interface PlaybackPipelineEvents {
-    /** New frame ready for display (scaled by ExpressionProfile) */
+interface TTSPlaybackEvents {
+    /** New frame ready for display */
     'frame': FullFaceFrame;
-    /** Raw A2E frame (before profile scaling) */
+    /** Raw A2E frame */
     'frame:raw': Float32Array;
-    /** Playback started (first audio scheduled) */
+    /** Playback started */
     'playback:start': {
         time: number;
     };
-    /** Playback completed naturally */
+    /** Playback completed */
     'playback:complete': void;
-    /** Playback stopped (user-initiated) */
-    'playback:stop': void;
-    /** Error occurred */
+    /** Error */
     'error': Error;
-    /** State changed */
-    'state': PlaybackState;
-    'full_frame_ready': FullFaceFrame;
-    'lam_frame_ready': Float32Array;
-    'playback_complete': void;
-    'playback_start': number;
     [key: string]: unknown;
 }
-declare class PlaybackPipeline extends EventEmitter<PlaybackPipelineEvents> {
+declare class TTSPlayback extends EventEmitter<TTSPlaybackEvents> {
     private readonly config;
-    private scheduler;
-    private coalescer;
-    private processor;
-    private readonly sampleRate;
-    private _state;
-    private playbackStarted;
-    private monitorInterval;
-    private frameAnimationId;
-    private lastNewFrameTime;
-    private lastKnownLamFrame;
-    private staleWarningEmitted;
-    private readonly staleThresholdMs;
-    private frameLoopCount;
-    private profile;
-    private readonly neutralTransitionEnabled;
-    private readonly neutralTransitionMs;
-    private neutralTransitionFrame;
-    private neutralTransitionStart;
-    private neutralAnimationId;
-    private _currentFrame;
-    private _currentRawFrame;
-    /** Current pipeline state */
-    get state(): PlaybackState;
-    /** Current scaled blendshapes (updated in-place for perf) */
-    get currentFrame(): Float32Array | null;
-    /** Raw A2E blendshapes (before profile scaling) */
-    get currentRawFrame(): Float32Array | null;
-    constructor(config: PlaybackPipelineConfig);
-    /** Initialize AudioContext (lazy, call after user gesture) */
+    private _pipeline;
+    private initialized;
+    constructor(config: TTSPlaybackConfig);
+    /** Access underlying PlaybackPipeline for event subscriptions. */
+    get pipeline(): PlaybackPipeline | null;
+    /** Load TTS model + initialize PlaybackPipeline. */
     initialize(): Promise<void>;
-    /** Update ExpressionProfile at runtime */
-    setProfile(profile: ExpressionProfile): void;
-    /**
-     * Start a new playback session.
-     * Idempotent — calling during playback resets cleanly without emitting
-     * spurious playback:complete.
-     */
-    start(): void;
-    /** Feed a streaming audio chunk (PCM16 Uint8Array) */
-    onAudioChunk(chunk: Uint8Array): Promise<void>;
-    /** Signal end of audio stream (flushes remaining audio) */
-    end(): Promise<void>;
     /**
-     * Feed a complete audio buffer. Chunks into 200ms pieces, schedules each
-     * for playback, runs A2E inference, then waits for completion.
+     * Synthesize text and play with lip sync.
+     * Streams sentences with prefetch for minimal gaps.
+     *
+     * @returns Resolves when playback completes
      */
-    feedBuffer(audio: ArrayBuffer | Float32Array): Promise<void>;
-    /** Stop playback immediately with fade-out */
-    stop(fadeOutMs?: number): Promise<void>;
-    /** Cleanup all resources */
-    dispose(): void;
-    /** Get pipeline debug state */
-    getDebugState(): {
-        state: PlaybackState;
-        playbackStarted: boolean;
-        coalescerFill: number;
-        processorFill: number;
-        queuedFrames: number;
-        currentTime: number;
-        playbackEndTime: number;
-    };
-    private startFrameLoop;
-    private startMonitoring;
-    private onPlaybackComplete;
-    private startNeutralTransition;
-    private cancelNeutralTransition;
-    private stopInternal;
-    private setState;
+    speak(text: string, options?: {
+        signal?: AbortSignal;
+        voice?: string;
+    }): Promise<void>;
+    /** Dispose of all resources. */
+    dispose(): Promise<void>;
+    private speakWithPrefetch;
+    private speakSequential;
 }
 /**
@@ -936,6 +1105,15 @@ declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
     private aiIsSpeaking;
     private interruptionTriggeredThisSession;
     constructor(config?: InterruptionConfig);
+    /**
+     * Process raw audio energy for interruption detection (no VAD required).
+     * Used during speaking state when the unified worker is busy with TTS.
+     * Echo-cancelled mic input means energy above threshold = user speech.
+     *
+     * @param rms - RMS energy of audio chunk (0-1)
+     * @param energyThreshold - Minimum energy to consider speech (default: 0.02)
+     */
+    processAudioEnergy(rms: number, energyThreshold?: number): void;
     /**
      * Process VAD result for interruption detection
      * @param vadProbability - Speech probability from VAD (0-1)
@@ -1459,6 +1637,7 @@ declare class SileroVADWorker {
     private config;
     private isLoading;
     private _isLoaded;
+    private poisoned;
     private state;
     private context;
     private readonly chunkSize;
@@ -1526,1132 +1705,1376 @@ declare class SileroVADWorker {
 }
 /**
- * Factory function for Silero VAD with automatic Worker vs main thread selection
+ * Unified Inference Worker — single Web Worker hosting all WASM models
  *
- * Provides a unified API that automatically selects the optimal implementation:
- * - Desktop browsers: Uses SileroVADWorker (off-main-thread inference)
- * - Mobile devices: Uses SileroVADInference (main thread, avoids memory overhead)
- * - Fallback: Gracefully falls back to main thread if Worker fails
+ * Solves the multi-worker ORT problem: three per-model workers each load their
+ * own ORT WASM instance (~40MB each). On iOS this exceeds the ~1-1.5GB tab
+ * limit, forcing main-thread fallback which blocks the render loop.
  *
- * @category Inference
+ * This worker hosts SenseVoice + Wav2ArkitCpu + Silero VAD in a single
+ * ORT WASM instance. Same total model memory (~643MB), but inference runs
+ * off-main-thread. Works on iOS because there's only one ORT instance.
  *
- * @example Basic usage (auto-detect)
+ * Consumer usage:
  * ```typescript
- * import { createSileroVAD } from '@omote/core';
- *
- * const vad = createSileroVAD({
- *   modelUrl: '/models/silero-vad.onnx',
- *   threshold: 0.5,
- * });
- *
- * await vad.load();
- * const result = await vad.process(audioChunk);
- * if (result.isSpeech) {
- *   console.log('Speech detected!', result.probability);
- * }
- * ```
+ * const worker = new UnifiedInferenceWorker();
+ * await worker.init();
  *
- * @example Force worker usage
- * ```typescript
- * const vad = createSileroVAD({
- *   modelUrl: '/models/silero-vad.onnx',
- *   useWorker: true, // Force Worker even on mobile
- * });
+ * const asr = createSenseVoice({ modelUrl: '...', unifiedWorker: worker });
+ * const lam = createA2E({ gpuModelUrl: '...', cpuModelUrl: '...', unifiedWorker: worker });
+ * const vad = createSileroVAD({ modelUrl: '...', unifiedWorker: worker });
  * ```
  *
- * @example Force main thread
- * ```typescript
- * const vad = createSileroVAD({
- *   modelUrl: '/models/silero-vad.onnx',
- *   useWorker: false, // Force main thread
- * });
- * ```
+ * @category Inference
  */
+/** Health state of the unified worker */
+type WorkerHealthState = 'healthy' | 'unhealthy' | 'recovering';
 /**
- * Common interface for both SileroVADInference and SileroVADWorker
+ * Unified Inference Worker — single Web Worker for all WASM models
  *
- * This interface defines the shared API that both implementations provide,
- * allowing consumers to use either interchangeably.
+ * Hosts SenseVoice, Wav2ArkitCpu, and Silero VAD in one ORT instance.
+ * Eliminates the multi-worker memory problem on iOS.
  */
-interface SileroVADBackend {
-    /** Current backend type (webgpu, wasm, or null if not loaded) */
-    readonly backend: RuntimeBackend | null;
-    /** Whether the model is loaded and ready for inference */
-    readonly isLoaded: boolean;
-    /** Audio sample rate (8000 or 16000 Hz) */
-    readonly sampleRate: number;
-    /** Speech detection threshold (0-1) */
-    readonly threshold: number;
-    /**
-     * Load the ONNX model
-     * @returns Model loading information
-     */
-    load(): Promise<VADModelInfo | VADWorkerModelInfo>;
-    /**
-     * Process a single audio chunk
-     * @param audioChunk - Float32Array of exactly chunkSize samples
-     * @returns VAD result with speech probability
-     */
-    process(audioChunk: Float32Array): Promise<VADResult>;
-    /**
-     * Reset state for new audio stream
-     */
-    reset(): void | Promise<void>;
+declare class UnifiedInferenceWorker {
+    private worker;
+    private pendingRequests;
+    private initialized;
+    private healthState;
+    private consecutiveFailures;
+    private _generation;
+    private recovering;
     /**
-     * Dispose of the model and free resources
+     * Initialize the worker (load ORT WASM from CDN)
      */
+    init(): Promise<void>;
+    loadSenseVoice(config: {
+        modelUrl: string;
+        tokensUrl: string;
+        language: number;
+        textNorm: number;
+    }): Promise<SenseVoiceModelInfo>;
+    transcribe(audio: Float32Array): Promise<SenseVoiceResult>;
+    disposeSenseVoice(): Promise<void>;
+    loadA2E(config: {
+        modelUrl: string;
+        externalDataUrl: string | null;
+    }): Promise<A2EModelInfo>;
+    inferA2E(audio: Float32Array): Promise<{
+        blendshapes: Float32Array;
+        numFrames: number;
+        numBlendshapes: number;
+        inferenceTimeMs: number;
+    }>;
+    disposeA2E(): Promise<void>;
+    loadLAM(config: {
+        modelUrl: string;
+        externalDataUrl: string | null;
+        numIdentityClasses?: number;
+    }): Promise<A2EModelInfo>;
+    inferLAM(audio: Float32Array, identityIndex?: number): Promise<{
+        blendshapes: Float32Array;
+        numFrames: number;
+        numBlendshapes: number;
+        inferenceTimeMs: number;
+    }>;
+    disposeLAM(): Promise<void>;
+    loadKokoro(config: {
+        modelUrl: string;
+    }): Promise<{
+        loadTimeMs: number;
+    }>;
+    inferKokoro(tokens: number[], style: Float32Array, speed: number): Promise<{
+        audio: Float32Array;
+        inferenceTimeMs: number;
+    }>;
+    disposeKokoro(): Promise<void>;
+    loadVAD(config: {
+        modelUrl: string;
+        sampleRate: number;
+    }): Promise<VADWorkerModelInfo>;
+    processVAD(audio: Float32Array, state: Float32Array, context: Float32Array): Promise<{
+        probability: number;
+        state: Float32Array;
+        inferenceTimeMs: number;
+    }>;
+    resetVAD(): Promise<Float32Array>;
+    disposeVAD(): Promise<void>;
     dispose(): Promise<void>;
+    /** Check if the worker is initialized and healthy */
+    get isReady(): boolean;
+    /** Current health state of the worker */
+    get health(): WorkerHealthState;
+    /** Generation counter — increments on worker recovery. Adapters compare to detect stale sessions. */
+    get workerGeneration(): number;
+    /** Check if Web Workers are supported */
+    static isSupported(): boolean;
+    private assertReady;
+    private createWorker;
+    private handleWorkerMessage;
+    private sendMessage;
     /**
-     * Get required chunk size in samples
+     * Ping the worker to check if it's alive. If ping succeeds, worker was just
+     * busy with long inference. If ping fails, worker is truly stuck — recover.
      */
-    getChunkSize(): number;
+    private runHealthCheck;
     /**
-     * Get chunk duration in milliseconds
+     * Terminate the stuck worker, create a new one, and re-initialize ORT.
+     * Model sessions are lost — adapters must reload via generation check.
      */
-    getChunkDurationMs(): number;
+    private recoverWorker;
+    private rejectAllPending;
+    private cleanup;
 }
 /**
- * Configuration for the Silero VAD factory
+ * Shared base config for all inference factory functions.
  *
- * Extends SileroVADConfig with worker-specific options.
+ * @category Inference
  */
-interface SileroVADFactoryConfig extends Omit<SileroVADConfig, 'modelUrl'> {
-    /** Path or URL to the ONNX model. Default: HuggingFace CDN */
-    modelUrl?: string;
-    /**
-     * Force worker usage (true), main thread (false), or auto-detect (undefined).
-     *
-     * Auto-detection behavior:
-     * - Desktop: Uses Worker (better responsiveness, off-main-thread)
-     * - Mobile: Uses main thread (avoids 5MB memory overhead)
-     *
-     * You can override this to:
-     * - `true`: Force Worker even on mobile (if you have memory headroom)
-     * - `false`: Force main thread even on desktop (for debugging)
-     *
-     * Default: undefined (auto-detect)
-     */
-    useWorker?: boolean;
+/** Base config shared across all inference factory functions */
+interface InferenceFactoryConfig {
     /**
-     * Fallback to main thread on worker errors.
-     *
-     * When true (default), if the Worker fails to load or encounters an error,
-     * the factory will automatically create a main thread instance instead.
-     *
-     * When false, worker errors will propagate as exceptions.
-     *
-     * Default: true
+     * Worker mode:
+     * - 'auto' (default): Use Worker if supported, else main thread
+     * - true: Force Worker (throws if unsupported)
+     * - false: Force main thread
      */
-    fallbackOnError?: boolean;
+    useWorker?: boolean | 'auto';
     /**
      * Unified inference worker instance.
-     * When provided, uses SileroVADUnifiedAdapter (shared single-ORT worker).
+     * When provided, routes inference through the shared worker,
+     * keeping all inference off the main thread.
      * Takes precedence over useWorker setting.
      */
     unifiedWorker?: UnifiedInferenceWorker;
 }
 /**
- * Check if the current environment supports VAD Web Workers
+ * Factory function for SenseVoice ASR with automatic Worker vs main thread selection
  *
- * Requirements:
- * - Worker constructor must exist
- * - Blob URL support (for inline worker script)
+ * Provides a unified API that automatically selects the optimal implementation:
+ * - Worker supported: Uses SenseVoiceWorker (off-main-thread inference)
+ * - Worker unsupported: Uses SenseVoiceInference (main thread)
  *
- * @returns true if VAD Worker is supported
- */
-declare function supportsVADWorker(): boolean;
-/**
- * Create a Silero VAD instance with automatic implementation selection
+ * @category Inference
  *
- * This factory function automatically selects between:
- * - **SileroVADWorker**: Off-main-thread inference (better for desktop)
- * - **SileroVADInference**: Main thread inference (better for mobile)
+ * @example Auto-detect (recommended)
+ * ```typescript
+ * import { createSenseVoice } from '@omote/core';
  *
- * The selection is based on:
- * 1. Explicit `useWorker` config (if provided)
- * 2. Platform detection (mobile vs desktop)
- * 3. Worker API availability
+ * const asr = createSenseVoice({
+ *   modelUrl: '/models/sensevoice/model.int8.onnx',
+ * });
+ * await asr.load();
+ * const { text, emotion } = await asr.transcribe(audioSamples);
+ * ```
  *
- * Both implementations share the same interface (SileroVADBackend),
- * so consumers can use either interchangeably.
+ * @example Force worker
+ * ```typescript
+ * const asr = createSenseVoice({
+ *   modelUrl: '/models/sensevoice/model.int8.onnx',
+ *   useWorker: true,
+ * });
+ * ```
+ *
+ * @example Force main thread
+ * ```typescript
+ * const asr = createSenseVoice({
+ *   modelUrl: '/models/sensevoice/model.int8.onnx',
+ *   useWorker: false,
+ * });
+ * ```
+ */
+/**
+ * Common interface for both SenseVoiceInference and SenseVoiceWorker
+ */
+interface SenseVoiceBackend {
+    /** Whether the model is loaded and ready for inference */
+    readonly isLoaded: boolean;
+    /** Current backend type ('wasm' for worker, full backend for main thread, null if not loaded) */
+    readonly backend: 'wasm' | 'webgpu' | null;
+    /**
+     * Load the ONNX model
+     * @param onProgress - Optional progress callback (fires once at 100% for worker)
+     * @returns Model loading information
+     */
+    load(onProgress?: (loaded: number, total: number) => void): Promise<SenseVoiceModelInfo>;
+    /**
+     * Transcribe audio samples to text
+     * @param audioSamples - Float32Array of audio samples at 16kHz
+     * @returns Transcription result
+     */
+    transcribe(audioSamples: Float32Array): Promise<SenseVoiceResult>;
+    /**
+     * Dispose of the model and free resources
+     */
+    dispose(): Promise<void>;
+}
+/**
+ * Configuration for the SenseVoice factory
+ */
+interface CreateSenseVoiceConfig extends InferenceFactoryConfig {
+    /** Path or URL to model.int8.onnx (239MB). Default: HuggingFace CDN */
+    modelUrl?: string;
+    /** Path or URL to tokens.txt vocabulary file (default: sibling of modelUrl) */
+    tokensUrl?: string;
+    /** Language hint (default: 'auto') */
+    language?: SenseVoiceLanguage;
+    /** Text normalization (default: 'with_itn') */
+    textNorm?: 'with_itn' | 'without_itn';
+}
+/**
+ * Create a SenseVoice ASR instance with automatic implementation selection
  *
  * @param config - Factory configuration
- * @returns A SileroVAD instance (either Worker or main thread)
+ * @returns A SenseVoiceBackend instance (either Worker or main thread)
+ */
+declare function createSenseVoice(config?: CreateSenseVoiceConfig): SenseVoiceBackend;
+/**
+ * Shared blendshape constants and utilities for lip sync inference
  *
- * @example
- * ```typescript
- * // Auto-detect (recommended)
- * const vad = createSileroVAD({ modelUrl: '/models/silero-vad.onnx' });
+ * Contains LAM_BLENDSHAPES (canonical ordering), symmetrization, and
+ * index remapping used by both Wav2Vec2Inference and Wav2ArkitCpuInference.
  *
- * // Force Worker
- * const vadWorker = createSileroVAD({ modelUrl: '/models/silero-vad.onnx', useWorker: true });
+ * This module is the single source of truth for blendshape ordering to
+ * avoid circular dependencies between inference classes.
  *
- * // Force main thread
- * const vadMain = createSileroVAD({ modelUrl: '/models/silero-vad.onnx', useWorker: false });
- * ```
+ * @category Inference
  */
-declare function createSileroVAD(config?: SileroVADFactoryConfig): SileroVADBackend;
 /**
- * Web Worker-based wav2arkit_cpu lip sync inference
+ * LAM model blendshape names in order (52 total)
+ * NOTE: This is alphabetical ordering used by LAM, different from standard ARKit order
+ */
+declare const LAM_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft", "browOuterUpRight", "cheekPuff", "cheekSquintLeft", "cheekSquintRight", "eyeBlinkLeft", "eyeBlinkRight", "eyeLookDownLeft", "eyeLookDownRight", "eyeLookInLeft", "eyeLookInRight", "eyeLookOutLeft", "eyeLookOutRight", "eyeLookUpLeft", "eyeLookUpRight", "eyeSquintLeft", "eyeSquintRight", "eyeWideLeft", "eyeWideRight", "jawForward", "jawLeft", "jawOpen", "jawRight", "mouthClose", "mouthDimpleLeft", "mouthDimpleRight", "mouthFrownLeft", "mouthFrownRight", "mouthFunnel", "mouthLeft", "mouthLowerDownLeft", "mouthLowerDownRight", "mouthPressLeft", "mouthPressRight", "mouthPucker", "mouthRight", "mouthRollLower", "mouthRollUpper", "mouthShrugLower", "mouthShrugUpper", "mouthSmileLeft", "mouthSmileRight", "mouthStretchLeft", "mouthStretchRight", "mouthUpperUpLeft", "mouthUpperUpRight", "noseSneerLeft", "noseSneerRight", "tongueOut"];
+/** Alias for backwards compatibility */
+declare const ARKIT_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft", "browOuterUpRight", "cheekPuff", "cheekSquintLeft", "cheekSquintRight", "eyeBlinkLeft", "eyeBlinkRight", "eyeLookDownLeft", "eyeLookDownRight", "eyeLookInLeft", "eyeLookInRight", "eyeLookOutLeft", "eyeLookOutRight", "eyeLookUpLeft", "eyeLookUpRight", "eyeSquintLeft", "eyeSquintRight", "eyeWideLeft", "eyeWideRight", "jawForward", "jawLeft", "jawOpen", "jawRight", "mouthClose", "mouthDimpleLeft", "mouthDimpleRight", "mouthFrownLeft", "mouthFrownRight", "mouthFunnel", "mouthLeft", "mouthLowerDownLeft", "mouthLowerDownRight", "mouthPressLeft", "mouthPressRight", "mouthPucker", "mouthRight", "mouthRollLower", "mouthRollUpper", "mouthShrugLower", "mouthShrugUpper", "mouthSmileLeft", "mouthSmileRight", "mouthStretchLeft", "mouthStretchRight", "mouthUpperUpLeft", "mouthUpperUpRight", "noseSneerLeft", "noseSneerRight", "tongueOut"];
+/**
+ * Linearly interpolate between two blendshape weight arrays.
  *
- * Runs wav2arkit_cpu inference in a dedicated Web Worker to prevent main thread blocking.
- * Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
+ * Pure math utility with zero renderer dependency — used by all renderer
+ * adapters (@omote/three, @omote/babylon, @omote/r3f) for smooth frame
+ * transitions.
  *
- * Key design decisions:
- * - WASM backend only (WebGPU doesn't work in Workers)
- * - Audio copied (not transferred) to retain main thread access
- * - ONNX Runtime loaded from CDN in worker (no bundler complications)
- * - Blendshape symmetrization inlined in worker (no module imports)
- * - iOS: passes model URLs as strings directly to ORT (avoids 400MB+ JS heap)
+ * @param current - Current blendshape weights
+ * @param target  - Target blendshape weights
+ * @param factor  - Interpolation factor (0 = no change, 1 = snap to target). Default: 0.3
+ * @returns Interpolated weights as number[]
+ */
+declare function lerpBlendshapes(current: Float32Array | number[], target: Float32Array | number[], factor?: number): number[];
+/**
+ * Wav2Vec2 inference engine for Audio-to-Expression (A2E)
+ *
+ * Runs entirely in the browser using WebGPU or WASM.
+ * Takes raw 16kHz audio and outputs 52 ARKit blendshapes for lip sync.
  *
  * @category Inference
  *
- * @example
+ * @example Basic usage
  * ```typescript
- * import { Wav2ArkitCpuWorker } from '@omote/core';
+ * import { Wav2Vec2Inference } from '@omote/core';
  *
- * const lam = new Wav2ArkitCpuWorker({
- *   modelUrl: '/models/wav2arkit_cpu.onnx',
- * });
- * await lam.load();
+ * const wav2vec = new Wav2Vec2Inference({ modelUrl: '/models/model.onnx' });
+ * await wav2vec.load();
  *
- * const { blendshapes } = await lam.infer(audioSamples);
- * // blendshapes: Float32Array[] in LAM_BLENDSHAPES order, 30fps
+ * // Process 1 second of audio (16kHz = 16000 samples)
+ * const result = await wav2vec.infer(audioSamples);
+ * console.log('Blendshapes:', result.blendshapes); // [30, 52] for 30fps
  * ```
  */
-/**
- * Configuration for Wav2ArkitCpu Worker
- */
-interface Wav2ArkitCpuWorkerConfig {
-    /** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
+type InferenceBackend = BackendPreference;
+interface Wav2Vec2InferenceConfig {
+    /** Path or URL to the ONNX model */
     modelUrl: string;
     /**
      * Path or URL to external model data file (.onnx.data weights).
-     * Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
+     * Default: `${modelUrl}.data` (e.g., /models/model.onnx.data)
      *
      * Set to `false` to skip external data loading (single-file models only).
      */
     externalDataUrl?: string | false;
+    /** Preferred backend (auto will try WebGPU first, fallback to WASM) */
+    backend?: InferenceBackend;
+    /** Number of identity classes (default: 12 for streaming model) */
+    numIdentityClasses?: number;
+    /**
+     * Number of audio samples per inference chunk (default: 16000).
+     * Model supports variable chunk sizes. Smaller chunks = lower latency,
+     * more inference overhead. 8000 (500ms) is recommended for real-time lip sync.
+     */
+    chunkSize?: number;
+}
+interface ModelInfo {
+    backend: 'webgpu' | 'wasm';
+    loadTimeMs: number;
+    inputNames: string[];
+    outputNames: string[];
 }
 /**
- * Wav2ArkitCpu Worker - Lip sync inference in a Web Worker
- *
- * Runs wav2arkit_cpu inference off the main thread to prevent UI blocking.
- * Feature parity with Wav2ArkitCpuInference but runs in dedicated worker.
- *
- * @see Wav2ArkitCpuInference for main-thread version
+ * CTC vocabulary (32 tokens from wav2vec2-base-960h)
+ * @deprecated ASR is handled by SenseVoice. This will be removed in a future release.
  */
-declare class Wav2ArkitCpuWorker implements A2EBackend {
-    readonly modelId: "wav2arkit_cpu";
-    readonly chunkSize: number;
-    private worker;
+declare const CTC_VOCAB: string[];
+interface Wav2Vec2Result {
+    /** Blendshape weights [frames, 52] - 30fps */
+    blendshapes: Float32Array[];
+    /** Number of blendshape frames (30fps) */
+    numFrames: number;
+    /** Inference time in ms */
+    inferenceTimeMs: number;
+}
+declare class Wav2Vec2Inference implements A2EBackend {
+    readonly modelId: "wav2vec2";
+    private session;
+    private ort;
     private config;
+    private _backend;
     private isLoading;
-    private _isLoaded;
+    private numIdentityClasses;
+    readonly chunkSize: number;
     private inferenceQueue;
     private poisoned;
-    private pendingResolvers;
-    constructor(config: Wav2ArkitCpuWorkerConfig);
-    get isLoaded(): boolean;
-    /**
-     * Backend type (always 'wasm' for Worker, WebGPU not supported in Workers)
-     */
-    get backend(): 'wasm' | null;
-    /**
-     * Create the worker from inline script
-     */
-    private createWorker;
-    /**
-     * Handle messages from worker
-     */
-    private handleWorkerMessage;
+    private static readonly INFERENCE_TIMEOUT_MS;
+    constructor(config: Wav2Vec2InferenceConfig);
     /**
-     * Send message to worker and wait for response
+     * Check if WebGPU is available and working
+     * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
      */
-    private sendMessage;
+    static isWebGPUAvailable: typeof isWebGPUAvailable;
+    get backend(): 'webgpu' | 'wasm' | null;
+    get isLoaded(): boolean;
+    /** True if inference timed out and the session is permanently unusable */
+    get isSessionPoisoned(): boolean;
     /**
-     * Load the ONNX model in the worker
+     * Load the ONNX model
      */
-    load(): Promise<A2EModelInfo>;
+    load(): Promise<ModelInfo>;
     /**
      * Run inference on raw audio
-     *
-     * Accepts variable-length audio (not fixed to 16000 samples).
-     * Output frames = ceil(30 * numSamples / 16000).
-     *
      * @param audioSamples - Float32Array of raw audio at 16kHz
-     * @param _identityIndex - Ignored (identity 11 is baked into the model)
+     * @param identityIndex - Optional identity index (0-11, default 0 = neutral)
+     *
+     * Audio will be zero-padded or truncated to chunkSize samples.
      */
-    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
+    infer(audioSamples: Float32Array, identityIndex?: number): Promise<Wav2Vec2Result>;
     /**
-     * Queue inference to serialize worker calls
+     * Queue inference to serialize ONNX session calls
      */
     private queueInference;
     /**
-     * Dispose of the worker and free resources
+     * Get blendshape value by name for a specific frame
      */
-    dispose(): Promise<void>;
+    getBlendshape(blendshapes: Float32Array, name: typeof LAM_BLENDSHAPES[number]): number;
     /**
-     * Check if Web Workers are supported
+     * Dispose of the model and free resources
      */
-    static isSupported(): boolean;
+    dispose(): Promise<void>;
 }
 /**
- * Unified Inference Worker — single Web Worker hosting all WASM models
+ * Default and user-configurable model URLs for all ONNX models
  *
- * Solves the multi-worker ORT problem: three per-model workers each load their
- * own ORT WASM instance (~40MB each). On iOS this exceeds the ~1-1.5GB tab
- * limit, forcing main-thread fallback which blocks the render loop.
+ * Out of the box, models are served from HuggingFace CDN (`/resolve/main/`
+ * endpoint with `Access-Control-Allow-Origin: *`). For production apps that
+ * need faster or more reliable delivery, call {@link configureModelUrls} once
+ * at startup to point any or all models at your own CDN.
  *
- * This worker hosts SenseVoice + Wav2ArkitCpu + Silero VAD in a single
- * ORT WASM instance. Same total model memory (~643MB), but inference runs
- * off-main-thread. Works on iOS because there's only one ORT instance.
+ * @category Inference
  *
- * Consumer usage:
+ * @example Use HuggingFace defaults (zero-config)
  * ```typescript
- * const worker = new UnifiedInferenceWorker();
- * await worker.init();
- *
- * const asr = createSenseVoice({ modelUrl: '...', unifiedWorker: worker });
- * const lam = createA2E({ gpuModelUrl: '...', cpuModelUrl: '...', unifiedWorker: worker });
- * const vad = createSileroVAD({ modelUrl: '...', unifiedWorker: worker });
+ * import { createA2E } from '@omote/core';
+ * const a2e = createA2E(); // fetches from HuggingFace CDN
  * ```
  *
- * @category Inference
+ * @example Self-host on your own CDN
+ * ```typescript
+ * import { configureModelUrls, createA2E } from '@omote/core';
+ *
+ * configureModelUrls({
+ *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
+ *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
+ *   // omitted keys keep HuggingFace defaults
+ * });
+ *
+ * const a2e = createA2E(); // now fetches from your CDN
+ * ```
  */
+/** Model URL keys that can be configured */
+type ModelUrlKey = 'lam' | 'lamIos' | 'wav2arkitCpu' | 'senseVoice' | 'sileroVad' | 'kokoroTTS' | 'kokoroVoices';
 /**
- * Unified Inference Worker — single Web Worker for all WASM models
+ * Resolved model URLs — user overrides take priority, HuggingFace CDN is fallback.
  *
- * Hosts SenseVoice, Wav2ArkitCpu, and Silero VAD in one ORT instance.
- * Eliminates the multi-worker memory problem on iOS.
+ * All SDK factories (`createA2E`, `createSenseVoice`, `createSileroVAD`) and
+ * orchestrators (`VoicePipeline`) read from this object. Call
+ * {@link configureModelUrls} before constructing any pipelines to point
+ * models at your own CDN.
  */
-declare class UnifiedInferenceWorker {
-    private worker;
-    private pendingRequests;
-    private initialized;
-    private poisoned;
-    /**
-     * Initialize the worker (load ORT WASM from CDN)
-     */
-    init(): Promise<void>;
-    loadSenseVoice(config: {
-        modelUrl: string;
-        tokensUrl: string;
-        language: number;
-        textNorm: number;
-    }): Promise<SenseVoiceModelInfo>;
-    transcribe(audio: Float32Array): Promise<SenseVoiceResult>;
-    disposeSenseVoice(): Promise<void>;
-    loadA2E(config: {
-        modelUrl: string;
-        externalDataUrl: string | null;
-    }): Promise<A2EModelInfo>;
-    inferA2E(audio: Float32Array): Promise<{
-        blendshapes: Float32Array;
-        numFrames: number;
-        numBlendshapes: number;
-        inferenceTimeMs: number;
-    }>;
-    disposeA2E(): Promise<void>;
-    loadVAD(config: {
-        modelUrl: string;
-        sampleRate: number;
-    }): Promise<VADWorkerModelInfo>;
-    processVAD(audio: Float32Array, state: Float32Array, context: Float32Array): Promise<{
-        probability: number;
-        state: Float32Array;
-        inferenceTimeMs: number;
-    }>;
-    resetVAD(): Promise<Float32Array>;
-    disposeVAD(): Promise<void>;
-    dispose(): Promise<void>;
-    /** Check if the worker is initialized and not poisoned */
-    get isReady(): boolean;
-    /** Check if Web Workers are supported */
-    static isSupported(): boolean;
-    private assertReady;
-    private createWorker;
-    private handleWorkerMessage;
-    private sendMessage;
-    private rejectAllPending;
-    private cleanup;
-}
+declare const DEFAULT_MODEL_URLS: Readonly<Record<ModelUrlKey, string>>;
 /**
- * SenseVoice adapter backed by UnifiedInferenceWorker
+ * Configure custom model URLs. Overrides persist for the lifetime of the page.
+ * Omitted keys keep their HuggingFace CDN defaults.
  *
- * Implements SenseVoiceBackend, delegating all inference to the shared worker.
+ * Call this **once** at app startup, before constructing any pipelines.
+ *
+ * @example Self-host all models
+ * ```typescript
+ * configureModelUrls({
+ *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
+ *   wav2arkitCpu: 'https://cdn.example.com/models/wav2arkit_cpu.onnx',
+ *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
+ *   sileroVad: 'https://cdn.example.com/models/silero-vad.onnx',
+ * });
+ * ```
+ *
+ * @example Override only one model
+ * ```typescript
+ * configureModelUrls({
+ *   lam: '/models/model_fp16.onnx', // self-hosted, same origin
+ * });
+ * ```
  */
-declare class SenseVoiceUnifiedAdapter implements SenseVoiceBackend {
-    private worker;
-    private config;
-    private _isLoaded;
-    private languageId;
-    private textNormId;
-    private inferenceQueue;
-    constructor(worker: UnifiedInferenceWorker, config: SenseVoiceWorkerConfig);
-    get isLoaded(): boolean;
-    get backend(): 'wasm' | null;
-    load(onProgress?: (loaded: number, total: number) => void): Promise<SenseVoiceModelInfo>;
-    transcribe(audioSamples: Float32Array): Promise<SenseVoiceResult>;
-    dispose(): Promise<void>;
-}
+declare function configureModelUrls(urls: Partial<Record<ModelUrlKey, string>>): void;
 /**
- * Wav2ArkitCpu adapter backed by UnifiedInferenceWorker
- *
- * Implements A2EBackend, delegating all inference to the shared worker.
+ * Reset all model URL overrides back to HuggingFace CDN defaults.
+ * Mainly useful for testing.
  */
-declare class Wav2ArkitCpuUnifiedAdapter implements A2EBackend {
-    readonly modelId: "wav2arkit_cpu";
-    readonly chunkSize: number;
-    private worker;
-    private config;
-    private _isLoaded;
-    private inferenceQueue;
-    constructor(worker: UnifiedInferenceWorker, config: Wav2ArkitCpuWorkerConfig);
-    get isLoaded(): boolean;
-    get backend(): RuntimeBackend | null;
-    load(): Promise<A2EModelInfo>;
-    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
-    dispose(): Promise<void>;
-}
+declare function resetModelUrls(): void;
 /**
- * Silero VAD adapter backed by UnifiedInferenceWorker
- *
- * Implements SileroVADBackend, delegating all inference to the shared worker.
+ * Get the immutable HuggingFace CDN URLs (ignoring any overrides).
+ * Useful for documentation or fallback logic.
  */
-declare class SileroVADUnifiedAdapter implements SileroVADBackend {
-    private worker;
-    private config;
-    private _isLoaded;
-    private state;
-    private context;
-    private readonly chunkSize;
-    private readonly contextSize;
-    private inferenceQueue;
-    private preSpeechBuffer;
-    private wasSpeaking;
-    constructor(worker: UnifiedInferenceWorker, config: SileroVADConfig);
-    get isLoaded(): boolean;
-    get backend(): RuntimeBackend | null;
-    get sampleRate(): number;
-    get threshold(): number;
-    getChunkSize(): number;
-    getChunkDurationMs(): number;
-    load(): Promise<VADWorkerModelInfo>;
-    process(audioChunk: Float32Array): Promise<VADResult>;
-    reset(): Promise<void>;
-    dispose(): Promise<void>;
-}
+declare const HF_CDN_URLS: Readonly<Record<ModelUrlKey, string>>;
 /**
- * Factory function for SenseVoice ASR with automatic Worker vs main thread selection
+ * CPU-optimized lip sync inference using wav2arkit_cpu model
  *
- * Provides a unified API that automatically selects the optimal implementation:
- * - Worker supported: Uses SenseVoiceWorker (off-main-thread inference)
- * - Worker unsupported: Uses SenseVoiceInference (main thread)
+ * A Safari/iOS-compatible alternative to Wav2Vec2Inference (192MB fp16) designed
+ * for platforms where WebGPU crashes due to ONNX Runtime JSEP bugs.
  *
- * @category Inference
+ * The model uses ONNX external data format:
+ * - wav2arkit_cpu.onnx (1.86MB graph structure)
+ * - wav2arkit_cpu.onnx.data (402MB weights)
+ * Both files are fetched and cached automatically.
  *
- * @example Auto-detect (recommended)
- * ```typescript
- * import { createSenseVoice } from '@omote/core';
+ * Key differences from Wav2Vec2Inference:
+ * - WASM-only backend (CPU-optimized, single-threaded, no WebGPU)
+ * - No identity input (baked to identity 11)
+ * - No ASR output (lip sync only)
+ * - Dynamic input length (not fixed to 16000 samples)
+ * - Different native blendshape ordering (remapped to LAM_BLENDSHAPES)
  *
- * const asr = createSenseVoice({
- *   modelUrl: '/models/sensevoice/model.int8.onnx',
- * });
- * await asr.load();
- * const { text, emotion } = await asr.transcribe(audioSamples);
- * ```
+ * @category Inference
  *
- * @example Force worker
+ * @example
  * ```typescript
- * const asr = createSenseVoice({
- *   modelUrl: '/models/sensevoice/model.int8.onnx',
- *   useWorker: true,
- * });
- * ```
+ * import { Wav2ArkitCpuInference } from '@omote/core';
  *
- * @example Force main thread
- * ```typescript
- * const asr = createSenseVoice({
- *   modelUrl: '/models/sensevoice/model.int8.onnx',
- *   useWorker: false,
+ * const lam = new Wav2ArkitCpuInference({
+ *   modelUrl: '/models/wav2arkit_cpu.onnx',
  * });
+ * await lam.load();
+ *
+ * const { blendshapes } = await lam.infer(audioSamples);
+ * // blendshapes: Float32Array[] in LAM_BLENDSHAPES order, 30fps
  * ```
  */
-/**
- * Common interface for both SenseVoiceInference and SenseVoiceWorker
- */
-interface SenseVoiceBackend {
-    /** Whether the model is loaded and ready for inference */
-    readonly isLoaded: boolean;
-    /** Current backend type ('wasm' for worker, full backend for main thread, null if not loaded) */
-    readonly backend: 'wasm' | 'webgpu' | null;
+interface Wav2ArkitCpuConfig {
+    /** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
+    modelUrl: string;
     /**
-     * Load the ONNX model
-     * @param onProgress - Optional progress callback (fires once at 100% for worker)
-     * @returns Model loading information
+     * Path or URL to external model data file (.onnx.data weights).
+     * Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
+     *
+     * Set to `false` to skip external data loading (single-file models only).
      */
-    load(onProgress?: (loaded: number, total: number) => void): Promise<SenseVoiceModelInfo>;
+    externalDataUrl?: string | false;
+    /** Preferred backend (default: 'wasm' — this model is CPU-optimized) */
+    backend?: BackendPreference;
+}
+declare class Wav2ArkitCpuInference implements A2EBackend {
+    readonly modelId: "wav2arkit_cpu";
+    readonly chunkSize: number;
+    private session;
+    private ort;
+    private config;
+    private _backend;
+    private isLoading;
+    private inferenceQueue;
+    private poisoned;
+    private static readonly INFERENCE_TIMEOUT_MS;
+    constructor(config: Wav2ArkitCpuConfig);
+    get backend(): RuntimeBackend | null;
+    get isLoaded(): boolean;
     /**
-     * Transcribe audio samples to text
-     * @param audioSamples - Float32Array of audio samples at 16kHz
-     * @returns Transcription result
+     * Load the ONNX model
      */
-    transcribe(audioSamples: Float32Array): Promise<SenseVoiceResult>;
+    load(): Promise<A2EModelInfo>;
     /**
-     * Dispose of the model and free resources
+     * Run inference on raw audio
+     *
+     * Accepts variable-length audio (not fixed to 16000 samples).
+     * Output frames = ceil(30 * numSamples / 16000).
+     *
+     * @param audioSamples - Float32Array of raw audio at 16kHz
+     * @param _identityIndex - Ignored (identity 11 is baked into the model)
      */
-    dispose(): Promise<void>;
-}
-/**
- * Configuration for the SenseVoice factory
- */
-interface CreateSenseVoiceConfig {
-    /** Path or URL to model.int8.onnx (239MB). Default: HuggingFace CDN */
-    modelUrl?: string;
-    /** Path or URL to tokens.txt vocabulary file (default: sibling of modelUrl) */
-    tokensUrl?: string;
-    /** Language hint (default: 'auto') */
-    language?: SenseVoiceLanguage;
-    /** Text normalization (default: 'with_itn') */
-    textNorm?: 'with_itn' | 'without_itn';
+    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
     /**
-     * Worker mode:
-     * - 'auto' (default): Use Worker if supported, else main thread
-     * - true: Force Worker (throws if unsupported)
-     * - false: Force main thread
+     * Queue inference to serialize ONNX session calls
      */
-    useWorker?: boolean | 'auto';
+    private queueInference;
     /**
-     * Unified inference worker instance.
-     * When provided, uses SenseVoiceUnifiedAdapter (shared single-ORT worker).
-     * Takes precedence over useWorker setting.
+     * Dispose of the model and free resources
      */
-    unifiedWorker?: UnifiedInferenceWorker;
+    dispose(): Promise<void>;
 }
-/**
- * Create a SenseVoice ASR instance with automatic implementation selection
- *
- * @param config - Factory configuration
- * @returns A SenseVoiceBackend instance (either Worker or main thread)
- */
-declare function createSenseVoice(config?: CreateSenseVoiceConfig): SenseVoiceBackend;
 /**
- * Shared blendshape constants and utilities for lip sync inference
- *
- * Contains LAM_BLENDSHAPES (canonical ordering), symmetrization, and
- * index remapping used by both Wav2Vec2Inference and Wav2ArkitCpuInference.
- *
- * This module is the single source of truth for blendshape ordering to
- * avoid circular dependencies between inference classes.
- *
- * @category Inference
- */
-/**
- * LAM model blendshape names in order (52 total)
- * NOTE: This is alphabetical ordering used by LAM, different from standard ARKit order
- */
-declare const LAM_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft", "browOuterUpRight", "cheekPuff", "cheekSquintLeft", "cheekSquintRight", "eyeBlinkLeft", "eyeBlinkRight", "eyeLookDownLeft", "eyeLookDownRight", "eyeLookInLeft", "eyeLookInRight", "eyeLookOutLeft", "eyeLookOutRight", "eyeLookUpLeft", "eyeLookUpRight", "eyeSquintLeft", "eyeSquintRight", "eyeWideLeft", "eyeWideRight", "jawForward", "jawLeft", "jawOpen", "jawRight", "mouthClose", "mouthDimpleLeft", "mouthDimpleRight", "mouthFrownLeft", "mouthFrownRight", "mouthFunnel", "mouthLeft", "mouthLowerDownLeft", "mouthLowerDownRight", "mouthPressLeft", "mouthPressRight", "mouthPucker", "mouthRight", "mouthRollLower", "mouthRollUpper", "mouthShrugLower", "mouthShrugUpper", "mouthSmileLeft", "mouthSmileRight", "mouthStretchLeft", "mouthStretchRight", "mouthUpperUpLeft", "mouthUpperUpRight", "noseSneerLeft", "noseSneerRight", "tongueOut"];
-/** Alias for backwards compatibility */
-declare const ARKIT_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft", "browOuterUpRight", "cheekPuff", "cheekSquintLeft", "cheekSquintRight", "eyeBlinkLeft", "eyeBlinkRight", "eyeLookDownLeft", "eyeLookDownRight", "eyeLookInLeft", "eyeLookInRight", "eyeLookOutLeft", "eyeLookOutRight", "eyeLookUpLeft", "eyeLookUpRight", "eyeSquintLeft", "eyeSquintRight", "eyeWideLeft", "eyeWideRight", "jawForward", "jawLeft", "jawOpen", "jawRight", "mouthClose", "mouthDimpleLeft", "mouthDimpleRight", "mouthFrownLeft", "mouthFrownRight", "mouthFunnel", "mouthLeft", "mouthLowerDownLeft", "mouthLowerDownRight", "mouthPressLeft", "mouthPressRight", "mouthPucker", "mouthRight", "mouthRollLower", "mouthRollUpper", "mouthShrugLower", "mouthShrugUpper", "mouthSmileLeft", "mouthSmileRight", "mouthStretchLeft", "mouthStretchRight", "mouthUpperUpLeft", "mouthUpperUpRight", "noseSneerLeft", "noseSneerRight", "tongueOut"];
-/**
- * Linearly interpolate between two blendshape weight arrays.
- *
- * Pure math utility with zero renderer dependency — used by all renderer
- * adapters (@omote/three, @omote/babylon, @omote/r3f) for smooth frame
- * transitions.
+ * Web Worker-based wav2arkit_cpu lip sync inference
  *
- * @param current - Current blendshape weights
- * @param target  - Target blendshape weights
- * @param factor  - Interpolation factor (0 = no change, 1 = snap to target). Default: 0.3
- * @returns Interpolated weights as number[]
- */
-declare function lerpBlendshapes(current: Float32Array | number[], target: Float32Array | number[], factor?: number): number[];
-/**
- * Wav2Vec2 inference engine for Audio-to-Expression (A2E)
+ * Runs wav2arkit_cpu inference in a dedicated Web Worker to prevent main thread blocking.
+ * Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
  *
- * Runs entirely in the browser using WebGPU or WASM.
- * Takes raw 16kHz audio and outputs 52 ARKit blendshapes for lip sync.
+ * Key design decisions:
+ * - WASM backend only (WebGPU doesn't work in Workers)
+ * - Audio copied (not transferred) to retain main thread access
+ * - ONNX Runtime loaded from CDN in worker (no bundler complications)
+ * - Blendshape symmetrization inlined in worker (no module imports)
+ * - iOS: passes model URLs as strings directly to ORT (avoids 400MB+ JS heap)
  *
  * @category Inference
  *
- * @example Basic usage
+ * @example
  * ```typescript
- * import { Wav2Vec2Inference } from '@omote/core';
+ * import { Wav2ArkitCpuWorker } from '@omote/core';
  *
- * const wav2vec = new Wav2Vec2Inference({ modelUrl: '/models/model.onnx' });
- * await wav2vec.load();
+ * const lam = new Wav2ArkitCpuWorker({
+ *   modelUrl: '/models/wav2arkit_cpu.onnx',
+ * });
+ * await lam.load();
  *
- * // Process 1 second of audio (16kHz = 16000 samples)
- * const result = await wav2vec.infer(audioSamples);
- * console.log('Blendshapes:', result.blendshapes); // [30, 52] for 30fps
+ * const { blendshapes } = await lam.infer(audioSamples);
+ * // blendshapes: Float32Array[] in LAM_BLENDSHAPES order, 30fps
  * ```
  */
-type InferenceBackend = BackendPreference;
-interface Wav2Vec2InferenceConfig {
-    /** Path or URL to the ONNX model */
+/**
+ * Configuration for Wav2ArkitCpu Worker
+ */
+interface Wav2ArkitCpuWorkerConfig {
+    /** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
     modelUrl: string;
-    /**
-     * Path or URL to external model data file (.onnx.data weights).
-     * Default: `${modelUrl}.data` (e.g., /models/model.onnx.data)
-     *
-     * Set to `false` to skip external data loading (single-file models only).
-     */
-    externalDataUrl?: string | false;
-    /** Preferred backend (auto will try WebGPU first, fallback to WASM) */
-    backend?: InferenceBackend;
-    /** Number of identity classes (default: 12 for streaming model) */
-    numIdentityClasses?: number;
-    /**
-     * Number of audio samples per inference chunk (default: 16000).
-     * Model supports variable chunk sizes. Smaller chunks = lower latency,
-     * more inference overhead. 8000 (500ms) is recommended for real-time lip sync.
-     */
-    chunkSize?: number;
-}
-interface ModelInfo {
-    backend: 'webgpu' | 'wasm';
-    loadTimeMs: number;
-    inputNames: string[];
-    outputNames: string[];
+    /**
+     * Path or URL to external model data file (.onnx.data weights).
+     * Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
+     *
+     * Set to `false` to skip external data loading (single-file models only).
+     */
+    externalDataUrl?: string | false;
 }
 /**
- * CTC vocabulary (32 tokens from wav2vec2-base-960h)
- * @deprecated ASR is handled by SenseVoice. This will be removed in a future release.
+ * Wav2ArkitCpu Worker - Lip sync inference in a Web Worker
+ *
+ * Runs wav2arkit_cpu inference off the main thread to prevent UI blocking.
+ * Feature parity with Wav2ArkitCpuInference but runs in dedicated worker.
+ *
+ * @see Wav2ArkitCpuInference for main-thread version
  */
-declare const CTC_VOCAB: string[];
-interface Wav2Vec2Result {
-    /** Blendshape weights [frames, 52] - 30fps */
-    blendshapes: Float32Array[];
-    /** Number of blendshape frames (30fps) */
-    numFrames: number;
-    /** Inference time in ms */
-    inferenceTimeMs: number;
-}
-declare class Wav2Vec2Inference implements A2EBackend {
-    readonly modelId: "wav2vec2";
-    private session;
-    private ort;
+declare class Wav2ArkitCpuWorker implements A2EBackend {
+    readonly modelId: "wav2arkit_cpu";
+    readonly chunkSize: number;
+    private worker;
     private config;
-    private _backend;
     private isLoading;
-    private numIdentityClasses;
-    readonly chunkSize: number;
+    private _isLoaded;
     private inferenceQueue;
     private poisoned;
-    private static readonly INFERENCE_TIMEOUT_MS;
-    constructor(config: Wav2Vec2InferenceConfig);
+    private pendingResolvers;
+    constructor(config: Wav2ArkitCpuWorkerConfig);
+    get isLoaded(): boolean;
     /**
-     * Check if WebGPU is available and working
-     * (iOS returns false even if navigator.gpu exists due to ONNX Runtime bugs)
+     * Backend type (always 'wasm' for Worker, WebGPU not supported in Workers)
      */
-    static isWebGPUAvailable: typeof isWebGPUAvailable;
-    get backend(): 'webgpu' | 'wasm' | null;
-    get isLoaded(): boolean;
-    /** True if inference timed out and the session is permanently unusable */
-    get isSessionPoisoned(): boolean;
+    get backend(): 'wasm' | null;
     /**
-     * Load the ONNX model
+     * Create the worker from inline script
      */
-    load(): Promise<ModelInfo>;
+    private createWorker;
+    /**
+     * Handle messages from worker
+     */
+    private handleWorkerMessage;
+    /**
+     * Send message to worker and wait for response
+     */
+    private sendMessage;
+    /**
+     * Load the ONNX model in the worker
+     */
+    load(): Promise<A2EModelInfo>;
     /**
      * Run inference on raw audio
-     * @param audioSamples - Float32Array of raw audio at 16kHz
-     * @param identityIndex - Optional identity index (0-11, default 0 = neutral)
      *
-     * Audio will be zero-padded or truncated to chunkSize samples.
+     * Accepts variable-length audio (not fixed to 16000 samples).
+     * Output frames = ceil(30 * numSamples / 16000).
+     *
+     * @param audioSamples - Float32Array of raw audio at 16kHz
+     * @param _identityIndex - Ignored (identity 11 is baked into the model)
      */
-    infer(audioSamples: Float32Array, identityIndex?: number): Promise<Wav2Vec2Result>;
+    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
     /**
-     * Queue inference to serialize ONNX session calls
+     * Queue inference to serialize worker calls
      */
     private queueInference;
     /**
-     * Get blendshape value by name for a specific frame
+     * Dispose of the worker and free resources
      */
-    getBlendshape(blendshapes: Float32Array, name: typeof LAM_BLENDSHAPES[number]): number;
+    dispose(): Promise<void>;
     /**
-     * Dispose of the model and free resources
+     * Check if Web Workers are supported
      */
-    dispose(): Promise<void>;
+    static isSupported(): boolean;
 }
 /**
- * Default and user-configurable model URLs for all ONNX models
+ * Factory function for A2E with automatic GPU/CPU model selection
  *
- * Out of the box, models are served from HuggingFace CDN (`/resolve/main/`
- * endpoint with `Access-Control-Allow-Origin: *`). For production apps that
- * need faster or more reliable delivery, call {@link configureModelUrls} once
- * at startup to point any or all models at your own CDN.
+ * Provides a unified API with platform-aware model selection:
+ *
+ * **Desktop (Chrome/Edge/Android):**
+ *   Wav2Vec2 (WebGPU, 192MB fp16) → wav2arkit_cpu fallback
+ *
+ * **iOS/Safari:**
+ *   LAM iOS (WASM, opset 18, ~192MB fp16, native LayerNorm) → wav2arkit_cpu fallback
+ *
+ * The iOS variant is the same LAM model re-exported at opset 18 with native
+ * LayerNormalization ops (~256 fewer graph nodes than desktop's opset 14
+ * decomposed LayerNorm). This dramatically reduces peak memory during ORT
+ * graph parsing/optimization, fitting within iOS's ~1-1.5GB tab limit.
+ *
+ * Both variants use fp16 external data format (small graph + ~192MB weights).
+ * On iOS, ORT streams weights directly into WASM memory via URL pass-through
+ * (~2MB JS heap). If the model still OOMs, A2EWithFallback falls back to
+ * wav2arkit_cpu (404MB fp32, lower quality).
  *
  * @category Inference
  *
- * @example Use HuggingFace defaults (zero-config)
+ * @example Auto-detect (recommended, zero-config)
  * ```typescript
  * import { createA2E } from '@omote/core';
- * const a2e = createA2E(); // fetches from HuggingFace CDN
+ *
+ * const a2e = createA2E(); // uses HF CDN defaults (192MB fp16 GPU, 404MB CPU fallback)
+ * await a2e.load();
+ * const { blendshapes } = await a2e.infer(audioSamples);
  * ```
  *
- * @example Self-host on your own CDN
+ * @example Force CPU model
  * ```typescript
- * import { configureModelUrls, createA2E } from '@omote/core';
- *
- * configureModelUrls({
- *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
- *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
- *   // omitted keys keep HuggingFace defaults
- * });
- *
- * const a2e = createA2E(); // now fetches from your CDN
+ * const a2e = createA2E({ mode: 'cpu' });
  * ```
  */
-/** Model URL keys that can be configured */
-type ModelUrlKey = 'lam' | 'wav2arkitCpu' | 'senseVoice' | 'sileroVad';
 /**
- * Resolved model URLs — user overrides take priority, HuggingFace CDN is fallback.
+ * Configuration for the A2E factory
+ */
+interface CreateA2EConfig extends InferenceFactoryConfig {
+    /** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge). Default: HuggingFace CDN */
+    gpuModelUrl?: string;
+    /**
+     * URL for GPU model external data file (.onnx.data weights).
+     * Default: `${gpuModelUrl}.data`
+     *
+     * Set to `false` to skip external data loading (single-file models only).
+     */
+    gpuExternalDataUrl?: string | false;
+    /** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS). Default: HuggingFace CDN */
+    cpuModelUrl?: string;
+    /**
+     * Model selection mode:
+     * - 'auto': Safari/iOS -> CPU, everything else -> GPU (default)
+     * - 'gpu': Force GPU model (Wav2Vec2Inference)
+     * - 'cpu': Force CPU model (Wav2ArkitCpuInference)
+     */
+    mode?: 'auto' | 'gpu' | 'cpu';
+    /** Backend preference for GPU model (default: 'auto') */
+    gpuBackend?: BackendPreference;
+    /** Number of identity classes for GPU model (default: 12) */
+    numIdentityClasses?: number;
+    /**
+     * Fall back to CPU model if GPU model fails to load (default: true)
+     * Only applies when mode is 'auto' or 'gpu'
+     */
+    fallbackOnError?: boolean;
+}
+/**
+ * Create an A2E instance with automatic GPU/CPU model selection
  *
- * All SDK factories (`createA2E`, `createSenseVoice`, `createSileroVAD`) and
- * orchestrators (`VoicePipeline`) read from this object. Call
- * {@link configureModelUrls} before constructing any pipelines to point
- * models at your own CDN.
+ * @param config - Factory configuration
+ * @returns An A2EBackend instance (either GPU or CPU model)
  */
-declare const DEFAULT_MODEL_URLS: Readonly<Record<ModelUrlKey, string>>;
+declare function createA2E(config?: CreateA2EConfig): A2EBackend;
 /**
- * Configure custom model URLs. Overrides persist for the lifetime of the page.
- * Omitted keys keep their HuggingFace CDN defaults.
+ * A2EProcessor — Engine-agnostic audio-to-expression processor
  *
- * Call this **once** at app startup, before constructing any pipelines.
+ * The core inference primitive: audio samples in → blendshape frames out.
+ * No mic capture, no audio playback, no Web Audio API.
  *
- * @example Self-host all models
+ * This is what Unity/Unreal/Godot/any engine would use directly.
+ * Web-specific concerns (mic, AudioContext, scheduling) live in the
+ * orchestrator and pipeline layers above.
+ *
+ * Two output modes:
+ * - **Pull mode**: `pushAudio(samples, timestamp)` + `getFrameForTime(t)`
+ *   For TTS playback where frames are synced to AudioContext clock.
+ * - **Push mode**: `pushAudio(samples)` + `startDrip()` + `latestFrame`
+ *   For live mic / game loop where frames are consumed at ~30fps.
+ *
+ * @category Inference
+ *
+ * @example Pull mode (TTS playback)
  * ```typescript
- * configureModelUrls({
- *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
- *   wav2arkitCpu: 'https://cdn.example.com/models/wav2arkit_cpu.onnx',
- *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
- *   sileroVad: 'https://cdn.example.com/models/silero-vad.onnx',
- * });
+ * const processor = new A2EProcessor({ backend: a2e });
+ * processor.pushAudio(samples, audioContext.currentTime + delay);
+ * const frame = processor.getFrameForTime(audioContext.currentTime);
  * ```
  *
- * @example Override only one model
+ * @example Push mode (live mic)
  * ```typescript
- * configureModelUrls({
- *   lam: '/models/model_fp16.onnx', // self-hosted, same origin
+ * const processor = new A2EProcessor({
+ *   backend: a2e,
+ *   onFrame: (frame) => applyToAvatar(frame),
  * });
+ * processor.startDrip();
+ * processor.pushAudio(micSamples); // no timestamp → drip mode
  * ```
  */
-declare function configureModelUrls(urls: Partial<Record<ModelUrlKey, string>>): void;
-/**
- * Reset all model URL overrides back to HuggingFace CDN defaults.
- * Mainly useful for testing.
- */
-declare function resetModelUrls(): void;
-/**
- * Get the immutable HuggingFace CDN URLs (ignoring any overrides).
- * Useful for documentation or fallback logic.
- */
-declare const HF_CDN_URLS: Readonly<Record<ModelUrlKey, string>>;
+interface A2EProcessorConfig {
+    /** Inference backend */
+    backend: A2EBackend;
+    /** Sample rate (default: 16000) */
+    sampleRate?: number;
+    /** Samples per inference chunk (default: 16000 = 1s) */
+    chunkSize?: number;
+    /**
+     * Identity/style index for the A2E model (default: 0).
+     *
+     * The LAM model uses a one-hot identity vector (12 classes, indices 0-11) as
+     * style conditioning alongside audio features. Different indices produce
+     * different expression intensity across face regions (brows, eyes, cheeks).
+     *
+     * Only affects Wav2Vec2Inference (GPU model). Wav2ArkitCpuInference has
+     * identity 11 baked into the model weights.
+     */
+    identityIndex?: number;
+    /** Callback fired with each blendshape frame (push mode) */
+    onFrame?: (frame: Float32Array) => void;
+    /** Error callback */
+    onError?: (error: Error) => void;
+}
+declare class A2EProcessor {
+    private readonly backend;
+    private readonly sampleRate;
+    private readonly chunkSize;
+    private readonly identityIndex;
+    private readonly onFrame?;
+    private readonly onError?;
+    private bufferCapacity;
+    private buffer;
+    private writeOffset;
+    private bufferStartTime;
+    private timestampedQueue;
+    private plainQueue;
+    private _latestFrame;
+    private dripInterval;
+    private lastPulledFrame;
+    private inferenceRunning;
+    private pendingChunks;
+    private getFrameCallCount;
+    private disposed;
+    constructor(config: A2EProcessorConfig);
+    /**
+     * Push audio samples for inference (any source: mic, TTS, file).
+     *
+     * - With `timestamp`: frames stored with timestamps (pull mode)
+     * - Without `timestamp`: frames stored in plain queue (drip/push mode)
+     *
+     * Fire-and-forget: returns immediately, inference runs async.
+     */
+    pushAudio(samples: Float32Array, timestamp?: number): void;
+    /**
+     * Flush remaining buffered audio (pads to chunkSize).
+     * Call at end of stream to process final partial chunk.
+     *
+     * Routes through the serialized pendingChunks pipeline to maintain
+     * correct frame ordering. Without this, flush() could push frames
+     * with the latest timestamp to the queue before drainPendingChunks()
+     * finishes pushing frames with earlier timestamps — causing
+     * getFrameForTime() to see out-of-order timestamps and stall.
+     */
+    flush(): Promise<void>;
+    /**
+     * Reset buffer and frame queues
+     */
+    reset(): void;
+    /**
+     * Get frame synced to external clock (e.g. AudioContext.currentTime).
+     *
+     * Discards frames that are too old, returns the current frame,
+     * or holds last frame as fallback to prevent avatar freezing.
+     *
+     * @param currentTime - Current playback time (seconds)
+     * @returns Blendshape frame, or null if no frames yet
+     */
+    getFrameForTime(currentTime: number): Float32Array | null;
+    /** Latest frame from drip-feed (live mic, game loop) */
+    get latestFrame(): Float32Array | null;
+    /** Start 30fps drip-feed timer (push mode) */
+    startDrip(): void;
+    /** Stop drip-feed timer */
+    stopDrip(): void;
+    /** Number of frames waiting in queue (both modes combined) */
+    get queuedFrameCount(): number;
+    /** Buffer fill level as fraction of chunkSize (0-1) */
+    get fillLevel(): number;
+    /** Dispose resources */
+    dispose(): void;
+    /**
+     * Process pending chunks sequentially.
+     * Fire-and-forget — called from pushAudio() without awaiting.
+     */
+    private drainPendingChunks;
+    private handleError;
+}
 /**
- * CPU-optimized lip sync inference using wav2arkit_cpu model
+ * BlendshapeSmoother — Per-channel critically damped spring for 52 ARKit blendshapes
  *
- * A Safari/iOS-compatible alternative to Wav2Vec2Inference (192MB fp16) designed
- * for platforms where WebGPU crashes due to ONNX Runtime JSEP bugs.
+ * Eliminates frame gaps between inference batches by smoothly interpolating
+ * blendshape weights using critically damped springs (the game industry standard).
  *
- * The model uses ONNX external data format:
- * - wav2arkit_cpu.onnx (1.86MB graph structure)
- * - wav2arkit_cpu.onnx.data (402MB weights)
- * Both files are fetched and cached automatically.
+ * Each of the 52 blendshape channels has its own spring with position + velocity
+ * state. When a new inference frame arrives, spring targets are updated. Between
+ * frames, springs continue converging toward the last target — no frozen face.
  *
- * Key differences from Wav2Vec2Inference:
- * - WASM-only backend (CPU-optimized, single-threaded, no WebGPU)
- * - No identity input (baked to identity 11)
- * - No ASR output (lip sync only)
- * - Dynamic input length (not fixed to 16000 samples)
- * - Different native blendshape ordering (remapped to LAM_BLENDSHAPES)
+ * When inference stalls, `decayToNeutral()` sets all targets to 0, and the
+ * springs smoothly close the mouth / relax the face over the halflife period.
+ *
+ * Math from Daniel Holden's "Spring-It-On" (Epic Games):
+ * https://theorangeduck.com/page/spring-roll-call
  *
  * @category Inference
  *
- * @example
+ * @example Basic usage
  * ```typescript
- * import { Wav2ArkitCpuInference } from '@omote/core';
- *
- * const lam = new Wav2ArkitCpuInference({
- *   modelUrl: '/models/wav2arkit_cpu.onnx',
- * });
- * await lam.load();
+ * const smoother = new BlendshapeSmoother({ halflife: 0.06 });
  *
- * const { blendshapes } = await lam.infer(audioSamples);
- * // blendshapes: Float32Array[] in LAM_BLENDSHAPES order, 30fps
+ * // In frame loop (60fps):
+ * smoother.setTarget(inferenceFrame);        // when new frame arrives
+ * const smoothed = smoother.update(1/60);    // every render frame
+ * applyToAvatar(smoothed);
  * ```
  */
-interface Wav2ArkitCpuConfig {
-    /** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
-    modelUrl: string;
+interface BlendshapeSmootherConfig {
     /**
-     * Path or URL to external model data file (.onnx.data weights).
-     * Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
+     * Spring halflife in seconds — time for the distance to the target
+     * to reduce by half. Lower = snappier, higher = smoother.
      *
-     * Set to `false` to skip external data loading (single-file models only).
+     * - 0.04s (40ms): Very snappy, slight jitter on fast transitions
+     * - 0.06s (60ms): Sweet spot for lip sync (default)
+     * - 0.10s (100ms): Very smooth, slight lag on fast consonants
+     * - 0: Bypass mode — passes through raw target values (no smoothing)
+     *
+     * Default: 0.06
      */
-    externalDataUrl?: string | false;
-    /** Preferred backend (default: 'wasm' — this model is CPU-optimized) */
-    backend?: BackendPreference;
+    halflife?: number;
 }
-declare class Wav2ArkitCpuInference implements A2EBackend {
-    readonly modelId: "wav2arkit_cpu";
-    readonly chunkSize: number;
-    private session;
-    private ort;
-    private config;
-    private _backend;
-    private isLoading;
-    private inferenceQueue;
-    private poisoned;
-    private static readonly INFERENCE_TIMEOUT_MS;
-    constructor(config: Wav2ArkitCpuConfig);
-    get backend(): RuntimeBackend | null;
-    get isLoaded(): boolean;
+declare class BlendshapeSmoother {
+    private readonly halflife;
+    /** Current smoothed blendshape values */
+    private values;
+    /** Per-channel spring velocities */
+    private velocities;
+    /** Current spring targets (from latest inference frame) */
+    private targets;
+    /** Whether any target has been set */
+    private _hasTarget;
+    constructor(config?: BlendshapeSmootherConfig);
+    /** Whether a target frame has been set (false until first setTarget call) */
+    get hasTarget(): boolean;
     /**
-     * Load the ONNX model
+     * Set new target frame from inference output.
+     * Springs will converge toward these values on subsequent update() calls.
      */
-    load(): Promise<A2EModelInfo>;
+    setTarget(frame: Float32Array): void;
     /**
-     * Run inference on raw audio
+     * Advance all 52 springs by `dt` seconds and return the smoothed frame.
      *
-     * Accepts variable-length audio (not fixed to 16000 samples).
-     * Output frames = ceil(30 * numSamples / 16000).
+     * Call this every render frame (e.g., inside requestAnimationFrame).
+     * Returns the internal values buffer — do NOT mutate the returned array.
      *
-     * @param audioSamples - Float32Array of raw audio at 16kHz
-     * @param _identityIndex - Ignored (identity 11 is baked into the model)
+     * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
+     * @returns Smoothed blendshape values (Float32Array of 52)
      */
-    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
+    update(dt: number): Float32Array;
     /**
-     * Queue inference to serialize ONNX session calls
+     * Decay all spring targets to neutral (0).
+     *
+     * Call when inference stalls (no new frames for threshold duration).
+     * The springs will smoothly close the mouth / relax the face over
+     * the halflife period rather than freezing.
      */
-    private queueInference;
+    decayToNeutral(): void;
     /**
-     * Dispose of the model and free resources
+     * Reset all state (values, velocities, targets).
+     * Call when starting a new playback session.
      */
-    dispose(): Promise<void>;
+    reset(): void;
 }
 /**
- * Factory function for A2E with automatic GPU/CPU model selection
- *
- * Provides a unified API that always tries Wav2Vec2 (LAM fp16) first:
- * - All platforms: Tries Wav2Vec2Inference (192MB fp16, external data format)
- * - Fallback: Gracefully falls back to wav2arkit_cpu if GPU model fails to load
+ * Factory function for Silero VAD with automatic Worker vs main thread selection
  *
- * The fp16 external data format (385KB graph + 192MB weights) enables iOS support:
- * - URL pass-through: ORT streams weights directly into WASM memory (~2MB JS heap)
- * - Basic graph optimization: avoids ~750-950MB peak from 'all' optimization
- * - If iOS OOMs during session creation, A2EWithFallback catches it and loads
- *   wav2arkit_cpu (1.86MB graph + 402MB weights) as a safe fallback.
+ * Provides a unified API that automatically selects the optimal implementation:
+ * - Desktop browsers: Uses SileroVADWorker (off-main-thread inference)
+ * - Mobile devices: Uses SileroVADInference (main thread, avoids memory overhead)
+ * - Fallback: Gracefully falls back to main thread if Worker fails
  *
  * @category Inference
  *
- * @example Auto-detect (recommended, zero-config)
+ * @example Basic usage (auto-detect)
  * ```typescript
- * import { createA2E } from '@omote/core';
+ * import { createSileroVAD } from '@omote/core';
  *
- * const a2e = createA2E(); // uses HF CDN defaults (192MB fp16 GPU, 404MB CPU fallback)
- * await a2e.load();
- * const { blendshapes } = await a2e.infer(audioSamples);
+ * const vad = createSileroVAD({
+ *   modelUrl: '/models/silero-vad.onnx',
+ *   threshold: 0.5,
+ * });
+ *
+ * await vad.load();
+ * const result = await vad.process(audioChunk);
+ * if (result.isSpeech) {
+ *   console.log('Speech detected!', result.probability);
+ * }
  * ```
  *
- * @example Force CPU model
+ * @example Force worker usage
  * ```typescript
- * const a2e = createA2E({ mode: 'cpu' });
+ * const vad = createSileroVAD({
+ *   modelUrl: '/models/silero-vad.onnx',
+ *   useWorker: true, // Force Worker even on mobile
+ * });
+ * ```
+ *
+ * @example Force main thread
+ * ```typescript
+ * const vad = createSileroVAD({
+ *   modelUrl: '/models/silero-vad.onnx',
+ *   useWorker: false, // Force main thread
+ * });
  * ```
  */
 /**
- * Configuration for the A2E factory
+ * Common interface for both SileroVADInference and SileroVADWorker
+ *
+ * This interface defines the shared API that both implementations provide,
+ * allowing consumers to use either interchangeably.
  */
-interface CreateA2EConfig {
-    /** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge). Default: HuggingFace CDN */
-    gpuModelUrl?: string;
+interface SileroVADBackend {
+    /** Current backend type (webgpu, wasm, or null if not loaded) */
+    readonly backend: RuntimeBackend | null;
+    /** Whether the model is loaded and ready for inference */
+    readonly isLoaded: boolean;
+    /** Audio sample rate (8000 or 16000 Hz) */
+    readonly sampleRate: number;
+    /** Speech detection threshold (0-1) */
+    readonly threshold: number;
     /**
-     * URL for GPU model external data file (.onnx.data weights).
-     * Default: `${gpuModelUrl}.data`
-     *
-     * Set to `false` to skip external data loading (single-file models only).
+     * Load the ONNX model
+     * @returns Model loading information
      */
-    gpuExternalDataUrl?: string | false;
-    /** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS). Default: HuggingFace CDN */
-    cpuModelUrl?: string;
+    load(): Promise<VADModelInfo | VADWorkerModelInfo>;
     /**
-     * Model selection mode:
-     * - 'auto': Safari/iOS -> CPU, everything else -> GPU (default)
-     * - 'gpu': Force GPU model (Wav2Vec2Inference)
-     * - 'cpu': Force CPU model (Wav2ArkitCpuInference)
+     * Process a single audio chunk
+     * @param audioChunk - Float32Array of exactly chunkSize samples
+     * @returns VAD result with speech probability
      */
-    mode?: 'auto' | 'gpu' | 'cpu';
-    /** Backend preference for GPU model (default: 'auto') */
-    gpuBackend?: BackendPreference;
-    /** Number of identity classes for GPU model (default: 12) */
-    numIdentityClasses?: number;
+    process(audioChunk: Float32Array): Promise<VADResult>;
+    /**
+     * Reset state for new audio stream
+     */
+    reset(): void | Promise<void>;
+    /**
+     * Dispose of the model and free resources
+     */
+    dispose(): Promise<void>;
+    /**
+     * Get required chunk size in samples
+     */
+    getChunkSize(): number;
     /**
-     * Fall back to CPU model if GPU model fails to load (default: true)
-     * Only applies when mode is 'auto' or 'gpu'
+     * Get chunk duration in milliseconds
      */
-    fallbackOnError?: boolean;
+    getChunkDurationMs(): number;
+}
+/**
+ * Configuration for the Silero VAD factory
+ *
+ * Extends SileroVADConfig with worker-specific options.
+ */
+interface SileroVADFactoryConfig extends Omit<SileroVADConfig, 'modelUrl'>, InferenceFactoryConfig {
+    /** Path or URL to the ONNX model. Default: HuggingFace CDN */
+    modelUrl?: string;
     /**
-     * Use Web Worker for CPU model inference (default: false)
+     * Fallback to main thread on worker errors.
+     *
+     * When true (default), if the Worker fails to load or encounters an error,
+     * the factory will automatically create a main thread instance instead.
      *
-     * When true, Wav2ArkitCpuWorker is used instead of Wav2ArkitCpuInference,
-     * running inference off the main thread to prevent UI blocking during
-     * model loading and inference.
+     * When false, worker errors will propagate as exceptions.
      *
-     * Only applies when the CPU model is selected (mode: 'cpu', auto on Safari/iOS,
-     * or fallback from GPU).
-     */
-    useWorker?: boolean;
-    /**
-     * Unified inference worker instance.
-     * When provided and CPU model is selected, uses Wav2ArkitCpuUnifiedAdapter.
-     * Takes precedence over useWorker setting for the CPU model path.
-     * GPU model (Wav2Vec2) always stays on main thread (WebGPU).
+     * Default: true
      */
-    unifiedWorker?: UnifiedInferenceWorker;
+    fallbackOnError?: boolean;
 }
 /**
- * Create an A2E instance with automatic GPU/CPU model selection
+ * Check if the current environment supports VAD Web Workers
  *
- * @param config - Factory configuration
- * @returns An A2EBackend instance (either GPU or CPU model)
+ * Requirements:
+ * - Worker constructor must exist
+ * - Blob URL support (for inline worker script)
+ *
+ * @returns true if VAD Worker is supported
  */
-declare function createA2E(config?: CreateA2EConfig): A2EBackend;
+declare function supportsVADWorker(): boolean;
 /**
- * A2EProcessor — Engine-agnostic audio-to-expression processor
+ * Create a Silero VAD instance with automatic implementation selection
  *
- * The core inference primitive: audio samples in → blendshape frames out.
- * No mic capture, no audio playback, no Web Audio API.
+ * This factory function automatically selects between:
+ * - **SileroVADWorker**: Off-main-thread inference (better for desktop)
+ * - **SileroVADInference**: Main thread inference (better for mobile)
  *
- * This is what Unity/Unreal/Godot/any engine would use directly.
- * Web-specific concerns (mic, AudioContext, scheduling) live in the
- * orchestrator and pipeline layers above.
+ * The selection is based on:
+ * 1. Explicit `useWorker` config (if provided)
+ * 2. Platform detection (mobile vs desktop)
+ * 3. Worker API availability
  *
- * Two output modes:
- * - **Pull mode**: `pushAudio(samples, timestamp)` + `getFrameForTime(t)`
- *   For TTS playback where frames are synced to AudioContext clock.
- * - **Push mode**: `pushAudio(samples)` + `startDrip()` + `latestFrame`
- *   For live mic / game loop where frames are consumed at ~30fps.
+ * Both implementations share the same interface (SileroVADBackend),
+ * so consumers can use either interchangeably.
  *
- * @category Inference
+ * @param config - Factory configuration
+ * @returns A SileroVAD instance (either Worker or main thread)
  *
- * @example Pull mode (TTS playback)
+ * @example
  * ```typescript
- * const processor = new A2EProcessor({ backend: a2e });
- * processor.pushAudio(samples, audioContext.currentTime + delay);
- * const frame = processor.getFrameForTime(audioContext.currentTime);
- * ```
+ * // Auto-detect (recommended)
+ * const vad = createSileroVAD({ modelUrl: '/models/silero-vad.onnx' });
  *
- * @example Push mode (live mic)
- * ```typescript
- * const processor = new A2EProcessor({
- *   backend: a2e,
- *   onFrame: (frame) => applyToAvatar(frame),
- * });
- * processor.startDrip();
- * processor.pushAudio(micSamples); // no timestamp → drip mode
+ * // Force Worker
+ * const vadWorker = createSileroVAD({ modelUrl: '/models/silero-vad.onnx', useWorker: true });
+ *
+ * // Force main thread
+ * const vadMain = createSileroVAD({ modelUrl: '/models/silero-vad.onnx', useWorker: false });
  * ```
  */
+declare function createSileroVAD(config?: SileroVADFactoryConfig): SileroVADBackend;
-interface A2EProcessorConfig {
-    /** Inference backend */
-    backend: A2EBackend;
-    /** Sample rate (default: 16000) */
-    sampleRate?: number;
-    /** Samples per inference chunk (default: 16000 = 1s) */
-    chunkSize?: number;
-    /**
-     * Identity/style index for the A2E model (default: 0).
-     *
-     * The LAM model uses a one-hot identity vector (12 classes, indices 0-11) as
-     * style conditioning alongside audio features. Different indices produce
-     * different expression intensity across face regions (brows, eyes, cheeks).
-     *
-     * Only affects Wav2Vec2Inference (GPU model). Wav2ArkitCpuInference has
-     * identity 11 baked into the model weights.
-     */
-    identityIndex?: number;
-    /** Callback fired with each blendshape frame (push mode) */
-    onFrame?: (frame: Float32Array) => void;
-    /** Error callback */
-    onError?: (error: Error) => void;
-}
-declare class A2EProcessor {
-    private readonly backend;
-    private readonly sampleRate;
-    private readonly chunkSize;
-    private readonly identityIndex;
-    private readonly onFrame?;
-    private readonly onError?;
-    private bufferCapacity;
-    private buffer;
-    private writeOffset;
-    private bufferStartTime;
-    private timestampedQueue;
-    private plainQueue;
-    private _latestFrame;
-    private dripInterval;
-    private lastPulledFrame;
-    private inferenceRunning;
-    private pendingChunks;
-    private getFrameCallCount;
-    private disposed;
-    constructor(config: A2EProcessorConfig);
-    /**
-     * Push audio samples for inference (any source: mic, TTS, file).
-     *
-     * - With `timestamp`: frames stored with timestamps (pull mode)
-     * - Without `timestamp`: frames stored in plain queue (drip/push mode)
-     *
-     * Fire-and-forget: returns immediately, inference runs async.
-     */
-    pushAudio(samples: Float32Array, timestamp?: number): void;
-    /**
-     * Flush remaining buffered audio (pads to chunkSize).
-     * Call at end of stream to process final partial chunk.
-     *
-     * Routes through the serialized pendingChunks pipeline to maintain
-     * correct frame ordering. Without this, flush() could push frames
-     * with the latest timestamp to the queue before drainPendingChunks()
-     * finishes pushing frames with earlier timestamps — causing
-     * getFrameForTime() to see out-of-order timestamps and stall.
-     */
-    flush(): Promise<void>;
-    /**
-     * Reset buffer and frame queues
-     */
-    reset(): void;
-    /**
-     * Get frame synced to external clock (e.g. AudioContext.currentTime).
-     *
-     * Discards frames that are too old, returns the current frame,
-     * or holds last frame as fallback to prevent avatar freezing.
-     *
-     * @param currentTime - Current playback time (seconds)
-     * @returns Blendshape frame, or null if no frames yet
-     */
-    getFrameForTime(currentTime: number): Float32Array | null;
-    /** Latest frame from drip-feed (live mic, game loop) */
-    get latestFrame(): Float32Array | null;
-    /** Start 30fps drip-feed timer (push mode) */
-    startDrip(): void;
-    /** Stop drip-feed timer */
-    stopDrip(): void;
-    /** Number of frames waiting in queue (both modes combined) */
-    get queuedFrameCount(): number;
-    /** Buffer fill level as fraction of chunkSize (0-1) */
-    get fillLevel(): number;
-    /** Dispose resources */
-    dispose(): void;
-    /**
-     * Process pending chunks sequentially.
-     * Fire-and-forget — called from pushAudio() without awaiting.
-     */
-    private drainPendingChunks;
-    private handleError;
+/**
+ * SenseVoice adapter backed by UnifiedInferenceWorker
+ *
+ * Implements SenseVoiceBackend, delegating all inference to the shared worker.
+ */
+declare class SenseVoiceUnifiedAdapter implements SenseVoiceBackend {
+    private worker;
+    private config;
+    private _isLoaded;
+    private loadedGeneration;
+    private languageId;
+    private textNormId;
+    /** Per-adapter inference queue — ensures sequential state updates. */
+    private inferenceQueue;
+    constructor(worker: UnifiedInferenceWorker, config: SenseVoiceWorkerConfig);
+    get isLoaded(): boolean;
+    get backend(): 'wasm' | null;
+    load(onProgress?: (loaded: number, total: number) => void): Promise<SenseVoiceModelInfo>;
+    transcribe(audioSamples: Float32Array): Promise<SenseVoiceResult>;
+    dispose(): Promise<void>;
+    private assertLoaded;
 }
 /**
- * BlendshapeSmoother — Per-channel critically damped spring for 52 ARKit blendshapes
+ * Wav2ArkitCpu adapter backed by UnifiedInferenceWorker
  *
- * Eliminates frame gaps between inference batches by smoothly interpolating
- * blendshape weights using critically damped springs (the game industry standard).
+ * Implements A2EBackend, delegating all inference to the shared worker.
+ */
+declare class Wav2ArkitCpuUnifiedAdapter implements A2EBackend {
+    readonly modelId: "wav2arkit_cpu";
+    readonly chunkSize: number;
+    private worker;
+    private config;
+    private _isLoaded;
+    private loadedGeneration;
+    /** Per-adapter inference queue — ensures sequential state updates. */
+    private inferenceQueue;
+    constructor(worker: UnifiedInferenceWorker, config: Wav2ArkitCpuWorkerConfig);
+    get isLoaded(): boolean;
+    get backend(): RuntimeBackend | null;
+    load(): Promise<A2EModelInfo>;
+    infer(audioSamples: Float32Array, _identityIndex?: number): Promise<A2EResult>;
+    dispose(): Promise<void>;
+    private assertLoaded;
+}
+/**
+ * Wav2Vec2 (LAM) adapter backed by UnifiedInferenceWorker
  *
- * Each of the 52 blendshape channels has its own spring with position + velocity
- * state. When a new inference frame arrives, spring targets are updated. Between
- * frames, springs continue converging toward the last target — no frozen face.
+ * Implements A2EBackend, delegating all inference to the shared worker.
+ * Used on iOS to run LAM inference off the main thread via the unified worker.
+ */
+declare class Wav2Vec2UnifiedAdapter implements A2EBackend {
+    readonly modelId: "wav2vec2";
+    readonly chunkSize: number;
+    private worker;
+    private modelUrl;
+    private externalDataUrl;
+    private numIdentityClasses;
+    private _isLoaded;
+    private loadedGeneration;
+    /** Per-adapter inference queue — ensures sequential state updates. */
+    private inferenceQueue;
+    constructor(worker: UnifiedInferenceWorker, config: {
+        modelUrl: string;
+        externalDataUrl?: string | false;
+        numIdentityClasses?: number;
+        chunkSize?: number;
+    });
+    get isLoaded(): boolean;
+    get backend(): RuntimeBackend | null;
+    load(): Promise<A2EModelInfo>;
+    infer(audioSamples: Float32Array, identityIndex?: number): Promise<A2EResult>;
+    dispose(): Promise<void>;
+    private assertLoaded;
+}
+/**
+ * Kokoro TTS inference using ONNX Runtime Web
  *
- * When inference stalls, `decayToNeutral()` sets all targets to 0, and the
- * springs smoothly close the mouth / relax the face over the halflife period.
+ * Pure ONNX pipeline for browser-based text-to-speech. No transformers.js dependency.
+ * Uses eSpeak-NG WASM for phonemization and Kokoro-82M (q8, 92MB) for synthesis.
  *
- * Math from Daniel Holden's "Spring-It-On" (Epic Games):
- * https://theorangeduck.com/page/spring-roll-call
+ * Pipeline: Text → Normalize → Phonemize (eSpeak WASM) → Tokenize → Voice Style → ONNX → Audio
  *
  * @category Inference
  *
  * @example Basic usage
  * ```typescript
- * const smoother = new BlendshapeSmoother({ halflife: 0.06 });
+ * import { KokoroTTSInference } from '@omote/core';
  *
- * // In frame loop (60fps):
- * smoother.setTarget(inferenceFrame);        // when new frame arrives
- * const smoothed = smoother.update(1/60);    // every render frame
- * applyToAvatar(smoothed);
+ * const tts = new KokoroTTSInference({ defaultVoice: 'af_heart' });
+ * await tts.load();
+ *
+ * const { audio, duration } = await tts.synthesize("Hello world");
+ * // audio: Float32Array @ 24kHz
+ * ```
+ *
+ * @example Streaming (sentence-by-sentence)
+ * ```typescript
+ * for await (const chunk of tts.stream("First sentence. Second sentence.")) {
+ *   playbackPipeline.feedBuffer(chunk.audio);
+ * }
  * ```
+ *
+ * @module inference/KokoroTTSInference
  */
-interface BlendshapeSmootherConfig {
+interface KokoroTTSConfig {
+    /** ONNX model URL (default: HF CDN q8, 92MB) */
+    modelUrl?: string;
+    /** Voice files base URL (default: HF CDN voices directory) */
+    voiceBaseUrl?: string;
+    /** Default voice (default: 'af_heart') */
+    defaultVoice?: string;
+    /** Backend preference (default: 'wasm' — WebGPU crashes on int64 input_ids) */
+    backend?: BackendPreference;
+    /** Speech speed multiplier (default: 1.0) */
+    speed?: number;
+}
+interface KokoroTTSResult {
+    /** Audio samples at 24kHz */
+    audio: Float32Array;
+    /** Duration in seconds */
+    duration: number;
+    /** Inference time in ms */
+    inferenceTimeMs: number;
+}
+interface KokoroStreamChunk {
+    /** Audio for this sentence */
+    audio: Float32Array;
+    /** Original text segment */
+    text: string;
+    /** Phonemes for this segment */
+    phonemes: string;
+    /** Duration in seconds */
+    duration: number;
+}
+interface KokoroTTSModelInfo {
+    /** Resolved backend */
+    backend: string;
+    /** Model load time in ms */
+    loadTimeMs: number;
+    /** Default voice */
+    defaultVoice: string;
+}
+interface SynthesizeOptions {
+    /** Voice to use (overrides defaultVoice) */
+    voice?: string;
+    /** Speed multiplier (overrides config speed) */
+    speed?: number;
+}
+declare class KokoroTTSInference implements TTSBackend {
+    private readonly config;
+    private readonly modelUrl;
+    private readonly voiceBaseUrl;
+    private ort;
+    private session;
+    private _backend;
+    private isLoading;
+    private poisoned;
+    private inferenceQueue;
+    private phonemizerReady;
+    private defaultVoiceLoaded;
+    /** Cached voice data (voice name → Float32Array) */
+    private loadedVoices;
+    constructor(config?: KokoroTTSConfig);
+    get isLoaded(): boolean;
+    get sampleRate(): number;
     /**
-     * Spring halflife in seconds — time for the distance to the target
-     * to reduce by half. Lower = snappier, higher = smoother.
-     *
-     * - 0.04s (40ms): Very snappy, slight jitter on fast transitions
-     * - 0.06s (60ms): Sweet spot for lip sync (default)
-     * - 0.10s (100ms): Very smooth, slight lag on fast consonants
-     * - 0: Bypass mode — passes through raw target values (no smoothing)
-     *
-     * Default: 0.06
+     * Load the ONNX model, phonemizer WASM, and default voice.
+     * Safe to call multiple times (no-ops after first successful load).
      */
-    halflife?: number;
-}
-declare class BlendshapeSmoother {
-    private readonly halflife;
-    /** Current smoothed blendshape values */
-    private values;
-    /** Per-channel spring velocities */
-    private velocities;
-    /** Current spring targets (from latest inference frame) */
-    private targets;
-    /** Whether any target has been set */
-    private _hasTarget;
-    constructor(config?: BlendshapeSmootherConfig);
-    /** Whether a target frame has been set (false until first setTarget call) */
-    get hasTarget(): boolean;
+    load(): Promise<KokoroTTSModelInfo>;
     /**
-     * Set new target frame from inference output.
-     * Springs will converge toward these values on subsequent update() calls.
+     * Lazily initialize phonemizer and default voice on first use.
+     * Moves 100-200ms of main-thread blocking out of load() into first synthesis.
      */
-    setTarget(frame: Float32Array): void;
+    private ensureReady;
     /**
-     * Advance all 52 springs by `dt` seconds and return the smoothed frame.
-     *
-     * Call this every render frame (e.g., inside requestAnimationFrame).
-     * Returns the internal values buffer — do NOT mutate the returned array.
+     * Synthesize speech from text (one-shot, full audio output).
      *
-     * @param dt - Time step in seconds (e.g., 1/60 for 60fps)
-     * @returns Smoothed blendshape values (Float32Array of 52)
+     * @param text - Input text to synthesize
+     * @param options - Voice and speed overrides
+     * @returns Audio Float32Array at 24kHz with duration
      */
-    update(dt: number): Float32Array;
+    synthesize(text: string, options?: SynthesizeOptions): Promise<KokoroTTSResult>;
     /**
-     * Decay all spring targets to neutral (0).
+     * Stream synthesis sentence-by-sentence (async generator).
+     * Splits text on sentence boundaries and yields audio for each.
      *
-     * Call when inference stalls (no new frames for threshold duration).
-     * The springs will smoothly close the mouth / relax the face over
-     * the halflife period rather than freezing.
+     * Compatible with both `SynthesizeOptions` (legacy) and `TTSStreamOptions` (TTSBackend).
+     *
+     * @param text - Input text (can be multiple sentences)
+     * @param options - Voice, speed, and abort signal overrides
      */
-    decayToNeutral(): void;
+    stream(text: string, options?: SynthesizeOptions & TTSStreamOptions): AsyncGenerator<KokoroStreamChunk & TTSChunk>;
     /**
-     * Reset all state (values, velocities, targets).
-     * Call when starting a new playback session.
+     * Preload a voice (fetches and caches the .bin file).
      */
-    reset(): void;
+    preloadVoice(voiceName: string): Promise<void>;
+    /**
+     * List available voice names.
+     */
+    listVoices(): string[];
+    /**
+     * Release the ONNX session and clear cached voices.
+     */
+    dispose(): Promise<void>;
+    private ensureVoice;
+    private queueInference;
+    private runInference;
+}
+/**
+ * Kokoro TTS adapter backed by UnifiedInferenceWorker
+ *
+ * Implements TTSBackend, delegating ONNX inference to the shared worker.
+ * Phonemization, tokenization, and voice loading stay on main thread (fast, <10ms).
+ * Only the heavy `session.run()` (~1-2s per sentence) goes to the worker.
+ */
+declare class KokoroTTSUnifiedAdapter implements TTSBackend {
+    private worker;
+    private readonly config;
+    private readonly modelUrl;
+    private readonly voiceBaseUrl;
+    private _isLoaded;
+    private loadedGeneration;
+    /** Per-adapter inference queue — ensures sequential state updates. */
+    private inferenceQueue;
+    private loadedVoices;
+    private phonemizerReady;
+    private defaultVoiceLoaded;
+    constructor(worker: UnifiedInferenceWorker, config?: KokoroTTSConfig);
+    get isLoaded(): boolean;
+    get sampleRate(): number;
+    load(): Promise<KokoroTTSModelInfo>;
+    stream(text: string, options?: SynthesizeOptions & TTSStreamOptions): AsyncGenerator<KokoroStreamChunk & TTSChunk>;
+    dispose(): Promise<void>;
+    private ensureVoice;
+    private assertLoaded;
+    private runWorkerInference;
+}
+/**
+ * Silero VAD adapter backed by UnifiedInferenceWorker
+ *
+ * Implements SileroVADBackend, delegating all inference to the shared worker.
+ */
+declare class SileroVADUnifiedAdapter implements SileroVADBackend {
+    private worker;
+    private config;
+    private _isLoaded;
+    private loadedGeneration;
+    private state;
+    private context;
+    private readonly chunkSize;
+    private readonly contextSize;
+    /**
+     * Per-adapter inference queue — ensures sequential state updates.
+     *
+     * The unified worker processes messages serially (single thread), but this queue
+     * guarantees per-adapter state consistency. Example: VAD LSTM state from call N
+     * must be applied before call N+1 starts. Without the queue, two rapid process()
+     * calls could both read the same stale state.
+     */
+    private inferenceQueue;
+    private preSpeechBuffer;
+    private wasSpeaking;
+    constructor(worker: UnifiedInferenceWorker, config: SileroVADConfig);
+    get isLoaded(): boolean;
+    get backend(): RuntimeBackend | null;
+    get sampleRate(): number;
+    get threshold(): number;
+    getChunkSize(): number;
+    getChunkDurationMs(): number;
+    load(): Promise<VADWorkerModelInfo>;
+    process(audioChunk: Float32Array): Promise<VADResult>;
+    reset(): Promise<void>;
+    dispose(): Promise<void>;
+    private assertLoaded;
 }
 /**
@@ -2970,6 +3393,175 @@ declare class SafariSpeechRecognition {
     private emitError;
 }
+/**
+ * Kokoro TTS Web Worker implementation
+ *
+ * Moves the heavy ONNX `session.run()` to a dedicated Web Worker to prevent
+ * main thread blocking (~1-2s per sentence on WASM). Phonemizer, tokenizer,
+ * and voice logic stay on the main thread (fast, <10ms combined).
+ *
+ * Architecture:
+ * ```
+ * Main Thread (KokoroTTSWorker):        Worker (WORKER_SCRIPT):
+ *   stream(text) →
+ *     splitSentences(text)
+ *     for each sentence:
+ *       phonemize(sentence)  → phonemes
+ *       tokenize(phonemes)   → tokens
+ *       ensureVoice()        → style
+ *       postMessage(tokens, style, speed)  ──→  session.run(feeds)
+ *       await result                       ←──  postMessage(audio)
+ *       yield {audio, text, phonemes, duration}
+ * ```
+ *
+ * @category Inference
+ *
+ * @example Basic usage
+ * ```typescript
+ * import { KokoroTTSWorker } from '@omote/core';
+ *
+ * const tts = new KokoroTTSWorker({ defaultVoice: 'af_heart' });
+ * await tts.load();
+ *
+ * for await (const chunk of tts.stream("Hello world!")) {
+ *   playbackPipeline.feedBuffer(chunk.audio);
+ * }
+ * ```
+ *
+ * @module inference/KokoroTTSWorker
+ */
+/**
+ * Kokoro TTS Worker — off-main-thread ONNX inference for non-blocking TTS.
+ *
+ * Phonemizer/tokenizer/voice logic run on the main thread (fast, <10ms).
+ * Only the heavy ONNX `session.run()` is delegated to the worker.
+ *
+ * Implements the same TTSBackend interface as KokoroTTSInference.
+ *
+ * @see KokoroTTSInference for main-thread version
+ */
+declare class KokoroTTSWorker implements TTSBackend {
+    private readonly config;
+    private readonly modelUrl;
+    private readonly voiceBaseUrl;
+    private worker;
+    private _isLoaded;
+    private isLoading;
+    private poisoned;
+    /** Serializes all worker calls (stream sentence chunks + synthesize) */
+    private inferenceQueue;
+    /** Cached voice data (voice name → Float32Array) */
+    private loadedVoices;
+    /** Pending message handlers */
+    private pendingResolvers;
+    constructor(config?: KokoroTTSConfig);
+    get isLoaded(): boolean;
+    get sampleRate(): number;
+    load(): Promise<KokoroTTSModelInfo>;
+    synthesize(text: string, options?: SynthesizeOptions): Promise<KokoroTTSResult>;
+    stream(text: string, options?: SynthesizeOptions & TTSStreamOptions): AsyncGenerator<KokoroStreamChunk & TTSChunk>;
+    preloadVoice(voiceName: string): Promise<void>;
+    listVoices(): string[];
+    dispose(): Promise<void>;
+    static isSupported(): boolean;
+    private ensureVoice;
+    private createWorker;
+    private handleWorkerMessage;
+    private sendMessage;
+    /**
+     * Queue worker inference through the serialization queue.
+     * Sends pre-computed tokens + style to worker, returns audio.
+     */
+    private runWorkerInference;
+    /**
+     * One-shot synthesis (phonemize + tokenize + worker inference).
+     */
+    private queueInference;
+}
+/**
+ * Factory function for Kokoro TTS with automatic Worker vs main thread selection
+ *
+ * Provides a unified API that automatically selects the optimal implementation:
+ * - Desktop: Uses KokoroTTSWorker (off-main-thread inference, no render hitching)
+ * - iOS: Uses KokoroTTSInference (main thread, shared ORT instance to avoid OOM)
+ *
+ * @category Inference
+ *
+ * @example Auto-detect (recommended)
+ * ```typescript
+ * import { createKokoroTTS } from '@omote/core';
+ *
+ * const tts = createKokoroTTS({ defaultVoice: 'af_heart' });
+ * await tts.load();
+ *
+ * for await (const chunk of tts.stream("Hello world!")) {
+ *   playbackPipeline.feedBuffer(chunk.audio);
+ * }
+ * ```
+ *
+ * @example Force worker
+ * ```typescript
+ * const tts = createKokoroTTS({ defaultVoice: 'af_heart', useWorker: true });
+ * ```
+ *
+ * @example Force main thread
+ * ```typescript
+ * const tts = createKokoroTTS({ defaultVoice: 'af_heart', useWorker: false });
+ * ```
+ */
+/**
+ * Configuration for the Kokoro TTS factory
+ */
+interface CreateKokoroTTSConfig extends KokoroTTSConfig, InferenceFactoryConfig {
+}
+/**
+ * Create a Kokoro TTS instance with automatic implementation selection.
+ *
+ * @param config - Factory configuration
+ * @returns A TTSBackend instance (either Worker or main thread)
+ */
+declare function createKokoroTTS(config?: CreateKokoroTTSConfig): TTSBackend;
+/** Available Kokoro v1.0 voices */
+declare const KOKORO_VOICES: {
+    readonly af_heart: "af_heart";
+    readonly af_alloy: "af_alloy";
+    readonly af_aoede: "af_aoede";
+    readonly af_bella: "af_bella";
+    readonly af_jessica: "af_jessica";
+    readonly af_kore: "af_kore";
+    readonly af_nicole: "af_nicole";
+    readonly af_nova: "af_nova";
+    readonly af_river: "af_river";
+    readonly af_sarah: "af_sarah";
+    readonly af_sky: "af_sky";
+    readonly am_adam: "am_adam";
+    readonly am_echo: "am_echo";
+    readonly am_eric: "am_eric";
+    readonly am_fenrir: "am_fenrir";
+    readonly am_liam: "am_liam";
+    readonly am_michael: "am_michael";
+    readonly am_onyx: "am_onyx";
+    readonly am_puck: "am_puck";
+    readonly am_santa: "am_santa";
+    readonly bf_alice: "bf_alice";
+    readonly bf_emma: "bf_emma";
+    readonly bf_isabella: "bf_isabella";
+    readonly bf_lily: "bf_lily";
+    readonly bm_daniel: "bm_daniel";
+    readonly bm_fable: "bm_fable";
+    readonly bm_george: "bm_george";
+    readonly bm_lewis: "bm_lewis";
+};
+type KokoroVoiceName = keyof typeof KOKORO_VOICES;
+/**
+ * List all available voice names.
+ */
+declare function listVoices(): string[];
 /**
  * Emotion - Helper for creating emotion vectors for avatar animation
  *
@@ -4555,6 +5147,123 @@ declare class FaceCompositor {
     private applyProfileArrays;
 }
+/**
+ * CharacterController — Renderer-agnostic avatar composition loop
+ *
+ * Extracted from r3f's useOmoteAvatar + useGazeTracking.
+ * Owns FaceCompositor, emotion resolution, eye angle math, head smoothing.
+ * Pure function: input → output. No renderer side effects.
+ *
+ * @category Character
+ */
+/**
+ * Convert an emotion label string or EmotionWeights object to EmotionWeights.
+ * Cached to avoid per-frame string allocation.
+ */
+declare function resolveEmotion(emotion: string | EmotionWeights | null | undefined): EmotionWeights | undefined;
+/** Simple 3D vector (renderer-agnostic) */
+interface Vec3 {
+    x: number;
+    y: number;
+    z: number;
+}
+/** Quaternion (renderer-agnostic, for head rotation) */
+interface Quat {
+    x: number;
+    y: number;
+    z: number;
+    w: number;
+}
+interface CharacterControllerConfig {
+    /** FaceCompositor configuration */
+    compositor?: FaceCompositorConfig;
+    /** Gaze tracking config */
+    gaze?: {
+        enabled?: boolean;
+        yawInfluence?: number;
+        pitchInfluence?: number;
+        smoothing?: number;
+    };
+}
+interface CharacterUpdateInput {
+    /** Time since last frame in seconds */
+    deltaTime: number;
+    /** Scaled blendshapes from pipeline frame (or null when no frame) */
+    baseBlendshapes: Float32Array | null;
+    /** Raw blendshapes before profile scaling (optional) */
+    rawBlendshapes?: Float32Array | null;
+    /** Current emotion (string preset or weights object) */
+    emotion?: string | EmotionWeights | null;
+    /** Whether the avatar is currently speaking */
+    isSpeaking: boolean;
+    /** Current conversational state */
+    state: ConversationalState;
+    /** Audio energy level (0-1, drives emphasis/gesture intensity) */
+    audioEnergy?: number;
+    /** Camera world position (renderer provides in its own coords) */
+    cameraWorldPos?: Vec3;
+    /** Head bone world position (renderer provides in its own coords) */
+    headWorldPos?: Vec3;
+    /** Head bone world quaternion (for eye gaze local-space transform) */
+    headWorldQuat?: Quat;
+    /** Current avatar Y rotation in radians (for gaze compensation) */
+    avatarRotationY?: number;
+}
+interface CharacterUpdateOutput {
+    /** 52 ARKit blendshape values, clamped [0,1] — apply to morph targets */
+    blendshapes: Float32Array;
+    /** Head rotation delta (radians) — apply to head bone */
+    headDelta: {
+        yaw: number;
+        pitch: number;
+    };
+    /** Normalized eye targets for eye blendshapes */
+    eyeTargets: {
+        x: number;
+        y: number;
+    };
+}
+declare class CharacterController {
+    private readonly _compositor;
+    private readonly gazeEnabled;
+    private readonly gazeYawInfluence;
+    private readonly gazePitchInfluence;
+    private readonly gazeSmoothing;
+    private readonly zeroBase;
+    private readonly outputBuffer;
+    private readonly compositorInput;
+    private gazeHeadYaw;
+    private gazeHeadPitch;
+    constructor(config?: CharacterControllerConfig);
+    /**
+     * Call each frame. Pure function: input → output. No renderer side effects.
+     *
+     * Composes A2E blendshapes, emotion, procedural life, gaze tracking
+     * into a single output frame.
+     */
+    update(input: CharacterUpdateInput): CharacterUpdateOutput;
+    /** Set emotion (string preset or weights object). */
+    setEmotion(emotion: string | EmotionWeights): void;
+    /** Update character profile at runtime. */
+    setProfile(profile: CharacterProfile): void;
+    /** Access underlying FaceCompositor for advanced use. */
+    get compositor(): FaceCompositor;
+    /** Reset all state (smoothing, life layer, emotions). */
+    reset(): void;
+    dispose(): void;
+    /**
+     * Compute normalized eye targets from camera and head positions.
+     * Pure atan2/asin math — no renderer dependency.
+     */
+    private computeEyeTargets;
+    /**
+     * Compute smoothed head rotation. Returns target yaw/pitch values.
+     * Renderer is responsible for applying these to the head bone.
+     */
+    private computeHeadGaze;
+}
 /**
  * MicLipSync - Microphone → VAD → A2E → blendshapes
  *
@@ -4617,6 +5326,7 @@ declare class MicLipSync extends EventEmitter<MicLipSyncEvents> {
     private _currentFrame;
     private _currentRawFrame;
     private profile;
+    private _firstFrameEmitted;
     private speechStartTime;
     private vadChunkSize;
     private vadBuffer;
@@ -4700,9 +5410,19 @@ interface ResponseHandler {
  * @category Orchestration
  */
-interface VoicePipelineConfig {
-    /** URLs and options for model loading */
-    models: {
+/** Shared config options for all VoicePipeline modes */
+interface VoicePipelineBaseConfig {
+    /** Pre-built backends — skip internal factory creation. Takes precedence over `models`. */
+    backends?: {
+        asr: SenseVoiceBackend;
+        lam: A2EBackend;
+        vad: SileroVADBackend;
+        tts?: TTSBackend;
+    };
+    /** External unified worker (reuse across pipelines). Takes precedence over internal creation. */
+    unifiedWorker?: UnifiedInferenceWorker;
+    /** URLs and options for model loading. Required if `backends` not provided. */
+    models?: {
         senseVoice: {
             modelUrl: string;
             tokensUrl?: string;
@@ -4720,8 +5440,6 @@ interface VoicePipelineConfig {
             preSpeechBufferChunks?: number;
         };
     };
-    /** Consumer's response handler */
-    onResponse: ResponseHandler;
     /** Per-character expression weight scaling */
     profile?: ExpressionProfile;
     /** Identity/style index for Wav2Vec2 (default: 0) */
@@ -4763,6 +5481,46 @@ interface VoicePipelineConfig {
     /** Duration of neutral fade-out (default: 250ms) */
     neutralTransitionMs?: number;
 }
+/** Cloud TTS mode: consumer handles response + audio streaming */
+interface VoicePipelineCloudConfig extends VoicePipelineBaseConfig {
+    mode: 'cloud';
+    /** Consumer's response handler (streams audio back) */
+    onResponse: ResponseHandler;
+}
+/** Local TTS mode: SDK handles synthesis internally via TTSBackend */
+interface VoicePipelineLocalConfig extends VoicePipelineBaseConfig {
+    mode: 'local';
+    /**
+     * TTS backend (e.g., KokoroTTSInference). Provide either `tts` or `ttsConfig`.
+     *
+     * When `tts` is provided, VoicePipeline uses it as-is. On iOS, this means
+     * inference runs on the main thread (may cause UI freezes).
+     *
+     * Prefer `ttsConfig` for automatic unified worker integration on iOS.
+     */
+    tts?: TTSBackend;
+    /**
+     * Kokoro TTS configuration. When provided, VoicePipeline creates the TTS
+     * internally and passes the unified worker on iOS for off-main-thread inference.
+     *
+     * Takes precedence over `tts` if both are provided.
+     */
+    ttsConfig?: {
+        defaultVoice?: string;
+        speed?: number;
+        modelUrl?: string;
+        voiceBaseUrl?: string;
+    };
+    /** Optional text transform (e.g., LLM call). Receives transcript, returns response text. */
+    onTranscript?: (text: string) => string | Promise<string>;
+}
+/** Legacy config (no mode field) — treated as cloud mode. @deprecated Use mode: 'cloud' explicitly. */
+interface VoicePipelineLegacyConfig extends VoicePipelineBaseConfig {
+    mode?: undefined;
+    /** Consumer's response handler */
+    onResponse: ResponseHandler;
+}
+type VoicePipelineConfig = VoicePipelineCloudConfig | VoicePipelineLocalConfig | VoicePipelineLegacyConfig;
 interface VoicePipelineEvents {
     'state': VoicePipelineState;
     'loading:progress': LoadingProgress;
@@ -4787,6 +5545,7 @@ interface VoicePipelineEvents {
 }
 declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private readonly config;
+    private readonly isLocalMode;
     private _state;
     private stopped;
     private epoch;
@@ -4821,6 +5580,15 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     get sessionId(): string | null;
     constructor(config: VoicePipelineConfig);
     loadModels(): Promise<void>;
+    /**
+     * Load from pre-built backends (dependency injection path).
+     * Loads any backends that aren't loaded yet.
+     */
+    private loadFromBackends;
+    /**
+     * Load from factories (original path). Now loads SenseVoice, LAM, and VAD in parallel.
+     */
+    private loadFromFactories;
     start(): Promise<void>;
     stop(): void;
     setProfile(profile: ExpressionProfile): void;
@@ -4830,6 +5598,10 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private onSilenceDetected;
     private processEndOfSpeech;
     private callResponseHandler;
+    /** Cloud mode: delegate to consumer's onResponse handler */
+    private handleCloudResponse;
+    /** Local mode: synthesize text with TTSBackend, stream to PlaybackPipeline */
+    private handleLocalResponse;
     private handleInterruption;
     private startProgressiveTranscription;
     private stopProgressiveTranscription;
@@ -4840,4 +5612,4 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private clearSilenceTimer;
 }
-export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
+export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, CharacterController, type CharacterControllerConfig, type CharacterProfile, type CharacterUpdateInput, type CharacterUpdateOutput, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateKokoroTTSConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceFactoryConfig, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, KOKORO_VOICES, type KokoroStreamChunk, type KokoroTTSConfig, KokoroTTSInference, type KokoroTTSModelInfo, type KokoroTTSResult, KokoroTTSUnifiedAdapter, KokoroTTSWorker, type KokoroVoiceName, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type Quat, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type SynthesizeOptions, type TTSBackend, type TTSChunk, TTSPlayback, type TTSPlaybackConfig, type TTSPlaybackEvents, type TTSStreamOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Vec3, VoicePipeline, type VoicePipelineCloudConfig, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineLocalConfig, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, Wav2Vec2UnifiedAdapter, type WorkerHealthState, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createKokoroTTS, createSenseVoice, createSileroVAD, fetchWithCache, float32ToPcm16, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, int16ToFloat32, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, listVoices as listKokoroVoices, pcm16ToFloat32, preloadModels, resampleLinear, resetModelUrls, resolveBackend, resolveEmotion, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker, ttsToPlaybackFormat };