npm - @omote/core - Versions diffs - 0.6.2 → 0.6.6 - Mend

@omote/core 0.6.2 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -35,11 +35,7 @@ The most common use case: feed TTS audio chunks and get back 52 ARKit blendshape
 import { FullFacePipeline, createA2E } from '@omote/core';
 // 1. Create A2E backend (auto-detects GPU vs CPU)
-const lam = createA2E({
-  gpuModelUrl: '/models/lam-wav2vec2.onnx',
-  cpuModelUrl: '/models/wav2arkit_cpu.onnx',
-  mode: 'auto',
-});
+const lam = createA2E(); // auto-detects GPU vs CPU, fetches from HF CDN (192MB fp16)
 await lam.load();
 // 2. Create pipeline with expression profile
@@ -72,12 +68,7 @@ Auto-detects platform: Chrome/Edge/Android use WebGPU, Safari/iOS use WASM CPU f
 ```typescript
 import { createA2E } from '@omote/core';
-const a2e = createA2E({
-  gpuModelUrl: '/models/lam-wav2vec2.onnx',       // 384MB, WebGPU
-  cpuModelUrl: '/models/wav2arkit_cpu.onnx',        // 404MB, WASM
-  mode: 'auto',            // 'auto' | 'gpu' | 'cpu'
-  fallbackOnError: true,   // GPU failure → auto-switch to CPU
-});
+const a2e = createA2E(); // auto-detects: GPU (192MB fp16) or CPU (404MB WASM)
 await a2e.load();
 const { blendshapes } = await a2e.infer(audioSamples); // Float32Array (16kHz)
@@ -89,7 +80,7 @@ const { blendshapes } = await a2e.infer(audioSamples); // Float32Array (16kHz)
 ```typescript
 import { Wav2Vec2Inference, LAM_BLENDSHAPES } from '@omote/core';
-const lam = new Wav2Vec2Inference({ modelUrl: '/models/lam-wav2vec2.onnx' });
+const lam = new Wav2Vec2Inference({ modelUrl: '/models/model_fp16.onnx' });
 await lam.load();
 const { blendshapes } = await lam.infer(audioSamples);
@@ -317,7 +308,7 @@ Place models in your public assets directory:
 ```
 public/models/
-  lam-wav2vec2.onnx              # A2E lip sync — WebGPU (384MB)
+  model_fp16.onnx                 # A2E lip sync — WebGPU (192MB fp16, from omote-ai/lam-a2e)
   wav2arkit_cpu.onnx              # A2E lip sync — WASM fallback (1.86MB graph)
   wav2arkit_cpu.onnx.data         # A2E lip sync — WASM fallback (402MB weights)
   sensevoice/model.int8.onnx      # SenseVoice ASR (239MB)

package/dist/index.d.mts CHANGED Viewed

@@ -380,7 +380,7 @@ declare function isSafari(): boolean;
 /**
  * Recommend using CPU-optimized A2E model (wav2arkit_cpu)
  *
- * All iOS browsers use WebKit and have tight memory limits — the 384MB
+ * All iOS browsers use WebKit and have tight memory limits — the 192MB fp16
  * LAM model causes silent crashes. wav2arkit_cpu uses URL pass-through
  * (ORT fetches the 402MB weights directly into WASM, no JS heap copy).
  *
@@ -427,8 +427,8 @@ declare function shouldUseServerA2E(): boolean;
 /**
  * Common interface for audio-to-expression (A2E) inference backends
  *
- * Both Wav2Vec2Inference (GPU, 384MB) and Wav2ArkitCpuInference (CPU, 404MB)
- * implement this interface, allowing SyncedAudioPipeline and LAMPipeline to
+ * Both Wav2Vec2Inference (GPU, 192MB fp16) and Wav2ArkitCpuInference (CPU, 404MB)
+ * implement this interface, allowing FullFacePipeline and A2EProcessor to
  * work with either model transparently.
  *
  * @category Inference
@@ -461,7 +461,7 @@ interface A2EResult {
  * Common interface for A2E (audio-to-expression) inference engines
  *
  * Implemented by:
- * - Wav2Vec2Inference (WebGPU/WASM, 384MB, ASR + A2E)
+ * - Wav2Vec2Inference (WebGPU/WASM, 192MB fp16, A2E)
  * - Wav2ArkitCpuInference (WASM-only, 404MB, A2E only)
  */
 interface A2EBackend {
@@ -1616,7 +1616,9 @@ interface SileroVADBackend {
  *
  * Extends SileroVADConfig with worker-specific options.
  */
-interface SileroVADFactoryConfig extends SileroVADConfig {
+interface SileroVADFactoryConfig extends Omit<SileroVADConfig, 'modelUrl'> {
+    /** Path or URL to the ONNX model. Default: HuggingFace CDN */
+    modelUrl?: string;
     /**
      * Force worker usage (true), main thread (false), or auto-detect (undefined).
      *
@@ -1689,7 +1691,7 @@ declare function supportsVADWorker(): boolean;
  * const vadMain = createSileroVAD({ modelUrl: '/models/silero-vad.onnx', useWorker: false });
  * ```
  */
-declare function createSileroVAD(config: SileroVADFactoryConfig): SileroVADBackend;
+declare function createSileroVAD(config?: SileroVADFactoryConfig): SileroVADBackend;
 /**
  * Web Worker-based wav2arkit_cpu lip sync inference
@@ -2012,8 +2014,8 @@ interface SenseVoiceBackend {
  * Configuration for the SenseVoice factory
  */
 interface CreateSenseVoiceConfig {
-    /** Path or URL to model.int8.onnx (239MB) */
-    modelUrl: string;
+    /** Path or URL to model.int8.onnx (239MB). Default: HuggingFace CDN */
+    modelUrl?: string;
     /** Path or URL to tokens.txt vocabulary file (default: sibling of modelUrl) */
     tokensUrl?: string;
     /** Language hint (default: 'auto') */
@@ -2040,7 +2042,7 @@ interface CreateSenseVoiceConfig {
  * @param config - Factory configuration
  * @returns A SenseVoiceBackend instance (either Worker or main thread)
  */
-declare function createSenseVoice(config: CreateSenseVoiceConfig): SenseVoiceBackend;
+declare function createSenseVoice(config?: CreateSenseVoiceConfig): SenseVoiceBackend;
 /**
  * Shared blendshape constants and utilities for lip sync inference
@@ -2075,12 +2077,10 @@ declare const ARKIT_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "bro
 declare function lerpBlendshapes(current: Float32Array | number[], target: Float32Array | number[], factor?: number): number[];
 /**
- * Unified Wav2Vec2 inference engine for Audio-to-Expression + ASR
+ * Wav2Vec2 inference engine for Audio-to-Expression (A2E)
  *
  * Runs entirely in the browser using WebGPU or WASM.
- * Takes raw 16kHz audio and outputs:
- * - 52 ARKit blendshapes (lip sync)
- * - 32-token CTC logits (speech recognition)
+ * Takes raw 16kHz audio and outputs 52 ARKit blendshapes for lip sync.
  *
  * @category Inference
  *
@@ -2088,14 +2088,12 @@ declare function lerpBlendshapes(current: Float32Array | number[], target: Float
  * ```typescript
  * import { Wav2Vec2Inference } from '@omote/core';
  *
- * const wav2vec = new Wav2Vec2Inference({ modelUrl: '/models/unified_wav2vec2_asr_a2e.onnx' });
+ * const wav2vec = new Wav2Vec2Inference({ modelUrl: '/models/model.onnx' });
  * await wav2vec.load();
  *
  * // Process 1 second of audio (16kHz = 16000 samples)
  * const result = await wav2vec.infer(audioSamples);
- *
  * console.log('Blendshapes:', result.blendshapes); // [30, 52] for 30fps
- * console.log('ASR text:', result.text); // Decoded transcription
  * ```
  */
@@ -2128,21 +2126,16 @@ interface ModelInfo {
     outputNames: string[];
 }
-/** CTC vocabulary (32 tokens from wav2vec2-base-960h) */
+/**
+ * CTC vocabulary (32 tokens from wav2vec2-base-960h)
+ * @deprecated ASR is handled by SenseVoice. This will be removed in a future release.
+ */
 declare const CTC_VOCAB: string[];
 interface Wav2Vec2Result {
     /** Blendshape weights [frames, 52] - 30fps */
     blendshapes: Float32Array[];
-    /** Raw CTC logits [frames, 32] - 50fps */
-    asrLogits: Float32Array[];
-    /** Decoded text from CTC */
-    text: string;
-    /** Number of blendshape frames (30fps) — alias for numA2EFrames */
+    /** Number of blendshape frames (30fps) */
     numFrames: number;
-    /** Number of A2E frames (30fps) */
-    numA2EFrames: number;
-    /** Number of ASR frames (50fps) */
-    numASRFrames: number;
     /** Inference time in ms */
     inferenceTimeMs: number;
 }
@@ -2180,10 +2173,6 @@ declare class Wav2Vec2Inference implements A2EBackend {
      * Audio will be zero-padded or truncated to chunkSize samples.
      */
     infer(audioSamples: Float32Array, identityIndex?: number): Promise<Wav2Vec2Result>;
-    /**
-     * Decode CTC logits to text using greedy decoding
-     */
-    private decodeCTC;
     /**
      * Queue inference to serialize ONNX session calls
      */
@@ -2198,10 +2187,85 @@ declare class Wav2Vec2Inference implements A2EBackend {
     dispose(): Promise<void>;
 }
+/**
+ * Default and user-configurable model URLs for all ONNX models
+ *
+ * Out of the box, models are served from HuggingFace CDN (`/resolve/main/`
+ * endpoint with `Access-Control-Allow-Origin: *`). For production apps that
+ * need faster or more reliable delivery, call {@link configureModelUrls} once
+ * at startup to point any or all models at your own CDN.
+ *
+ * @category Inference
+ *
+ * @example Use HuggingFace defaults (zero-config)
+ * ```typescript
+ * import { createA2E } from '@omote/core';
+ * const a2e = createA2E(); // fetches from HuggingFace CDN
+ * ```
+ *
+ * @example Self-host on your own CDN
+ * ```typescript
+ * import { configureModelUrls, createA2E } from '@omote/core';
+ *
+ * configureModelUrls({
+ *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
+ *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
+ *   // omitted keys keep HuggingFace defaults
+ * });
+ *
+ * const a2e = createA2E(); // now fetches from your CDN
+ * ```
+ */
+/** Model URL keys that can be configured */
+type ModelUrlKey = 'lam' | 'wav2arkitCpu' | 'senseVoice' | 'sileroVad';
+/**
+ * Resolved model URLs — user overrides take priority, HuggingFace CDN is fallback.
+ *
+ * All SDK factories (`createA2E`, `createSenseVoice`, `createSileroVAD`) and
+ * orchestrators (`VoicePipeline`) read from this object. Call
+ * {@link configureModelUrls} before constructing any pipelines to point
+ * models at your own CDN.
+ */
+declare const DEFAULT_MODEL_URLS: Readonly<Record<ModelUrlKey, string>>;
+/**
+ * Configure custom model URLs. Overrides persist for the lifetime of the page.
+ * Omitted keys keep their HuggingFace CDN defaults.
+ *
+ * Call this **once** at app startup, before constructing any pipelines.
+ *
+ * @example Self-host all models
+ * ```typescript
+ * configureModelUrls({
+ *   lam: 'https://cdn.example.com/models/model_fp16.onnx',
+ *   wav2arkitCpu: 'https://cdn.example.com/models/wav2arkit_cpu.onnx',
+ *   senseVoice: 'https://cdn.example.com/models/sensevoice.int8.onnx',
+ *   sileroVad: 'https://cdn.example.com/models/silero-vad.onnx',
+ * });
+ * ```
+ *
+ * @example Override only one model
+ * ```typescript
+ * configureModelUrls({
+ *   lam: '/models/model_fp16.onnx', // self-hosted, same origin
+ * });
+ * ```
+ */
+declare function configureModelUrls(urls: Partial<Record<ModelUrlKey, string>>): void;
+/**
+ * Reset all model URL overrides back to HuggingFace CDN defaults.
+ * Mainly useful for testing.
+ */
+declare function resetModelUrls(): void;
+/**
+ * Get the immutable HuggingFace CDN URLs (ignoring any overrides).
+ * Useful for documentation or fallback logic.
+ */
+declare const HF_CDN_URLS: Readonly<Record<ModelUrlKey, string>>;
 /**
  * CPU-optimized lip sync inference using wav2arkit_cpu model
  *
- * A Safari/iOS-compatible alternative to Wav2Vec2Inference (384MB) designed
+ * A Safari/iOS-compatible alternative to Wav2Vec2Inference (192MB fp16) designed
  * for platforms where WebGPU crashes due to ONNX Runtime JSEP bugs.
  *
  * The model uses ONNX external data format:
@@ -2286,43 +2350,30 @@ declare class Wav2ArkitCpuInference implements A2EBackend {
 /**
  * Factory function for A2E with automatic GPU/CPU model selection
  *
- * Provides a unified API that automatically selects the optimal model:
- * - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (404MB, WASM)
- * - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (384MB, WebGPU)
- * - Fallback: Gracefully falls back to CPU model if GPU model fails to load
- *
- * Why two separate models?
- * Wav2Vec2 (LAM) cannot run on Safari/iOS for two reasons:
- * 1. Its dual-head transformer graph needs ~750-950MB peak during ORT session
- *    creation (graph optimization), exceeding iOS WebKit's ~1-1.5GB tab limit.
- * 2. It ships as a single 384MB .onnx file that must load into JS heap before
- *    ORT can consume it. iOS WebKit OOMs on this allocation.
- * wav2arkit_cpu solves both: external data format (1.86MB graph + 402MB weights)
- * lets ORT load only the tiny graph, then stream weights via URL pass-through
- * directly into WASM memory. JS heap stays at ~2MB.
+ * Provides a unified API that always tries Wav2Vec2 (LAM fp16) first:
+ * - All platforms: Tries Wav2Vec2Inference (192MB fp16, external data format)
+ * - Fallback: Gracefully falls back to wav2arkit_cpu if GPU model fails to load
+ *
+ * The fp16 external data format (385KB graph + 192MB weights) enables iOS support:
+ * - URL pass-through: ORT streams weights directly into WASM memory (~2MB JS heap)
+ * - Basic graph optimization: avoids ~750-950MB peak from 'all' optimization
+ * - If iOS OOMs during session creation, A2EWithFallback catches it and loads
+ *   wav2arkit_cpu (1.86MB graph + 402MB weights) as a safe fallback.
  *
  * @category Inference
  *
- * @example Auto-detect (recommended)
+ * @example Auto-detect (recommended, zero-config)
  * ```typescript
  * import { createA2E } from '@omote/core';
  *
- * const a2e = createA2E({
- *   gpuModelUrl: '/models/unified_wav2vec2_asr_a2e.onnx',
- *   cpuModelUrl: '/models/wav2arkit_cpu.onnx',
- * });
- *
+ * const a2e = createA2E(); // uses HF CDN defaults (192MB fp16 GPU, 404MB CPU fallback)
  * await a2e.load();
  * const { blendshapes } = await a2e.infer(audioSamples);
  * ```
  *
  * @example Force CPU model
  * ```typescript
- * const a2e = createA2E({
- *   gpuModelUrl: '/models/unified_wav2vec2_asr_a2e.onnx',
- *   cpuModelUrl: '/models/wav2arkit_cpu.onnx',
- *   mode: 'cpu',
- * });
+ * const a2e = createA2E({ mode: 'cpu' });
  * ```
  */
@@ -2330,8 +2381,8 @@ declare class Wav2ArkitCpuInference implements A2EBackend {
  * Configuration for the A2E factory
  */
 interface CreateA2EConfig {
-    /** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge) */
-    gpuModelUrl: string;
+    /** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge). Default: HuggingFace CDN */
+    gpuModelUrl?: string;
     /**
      * URL for GPU model external data file (.onnx.data weights).
      * Default: `${gpuModelUrl}.data`
@@ -2339,8 +2390,8 @@ interface CreateA2EConfig {
      * Set to `false` to skip external data loading (single-file models only).
      */
     gpuExternalDataUrl?: string | false;
-    /** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS) */
-    cpuModelUrl: string;
+    /** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS). Default: HuggingFace CDN */
+    cpuModelUrl?: string;
     /**
      * Model selection mode:
      * - 'auto': Safari/iOS -> CPU, everything else -> GPU (default)
@@ -2382,7 +2433,7 @@ interface CreateA2EConfig {
  * @param config - Factory configuration
  * @returns An A2EBackend instance (either GPU or CPU model)
  */
-declare function createA2E(config: CreateA2EConfig): A2EBackend;
+declare function createA2E(config?: CreateA2EConfig): A2EBackend;
 /**
  * A2EProcessor — Engine-agnostic audio-to-expression processor
@@ -4111,10 +4162,12 @@ declare class EmphasisDetector {
  * breathing/postural sway, and simplex noise-driven brow drift.
  *
  * Research sources:
- * - Blink frequency: 15-20/min (every 3-4s), PMC4043155
+ * - Blink frequency: log-normal IBI (mean=5.97s, SD(log)=0.89), PMC3565584
+ * - Blink shape: asymmetric (92ms close, 242ms open, 3:1 ratio), PMC4043155
  * - Saccade latency: ~200ms, duration 20-200ms
  * - Microsaccades: ~1/second, amplitude 0.02-0.05, Scholarpedia
  * - Fixation duration: 200-350ms, Nature Scientific Reports
+ * - Conversational gaze: Kendon (1967), Argyle & Cook (1976)
  * - Brow noise: NVIDIA Audio2Face, Unreal MetaHuman layered procedural animation
  *
  * @category Animation
@@ -4131,6 +4184,7 @@ declare class EmphasisDetector {
  *   eyeTargetY: normalizedY,
  *   audioEnergy: energy,     // 0-1 from AudioEnergyAnalyzer
  *   isSpeaking: true,
+ *   state: 'speaking',       // conversational state for gaze behavior
  * });
  *
  * // Apply blendshapes to mesh
@@ -4169,6 +4223,8 @@ interface LifeLayerConfig {
     /** Eye smoothing factor (higher = faster response). Default: 15 */
     eyeSmoothing?: number;
 }
+/** Conversational state for state-dependent gaze behavior */
+type ConversationalState = 'idle' | 'listening' | 'thinking' | 'speaking';
 /**
  * Per-frame input to the life layer
  */
@@ -4181,6 +4237,8 @@ interface LifeLayerInput {
     audioEnergy?: number;
     /** Whether avatar is speaking. Multiplies brow noise amplitude. */
     isSpeaking?: boolean;
+    /** Conversational state for gaze behavior (idle/listening/thinking/speaking) */
+    state?: ConversationalState;
 }
 /**
  * Per-frame output from the life layer
@@ -4202,6 +4260,7 @@ interface LifeLayerOutput {
  */
 declare class ProceduralLifeLayer {
     private blinkIntervalRange;
+    private useLogNormalBlinks;
     private gazeBreakIntervalRange;
     private gazeBreakAmplitudeRange;
     private eyeNoiseAmplitude;
@@ -4229,6 +4288,7 @@ declare class ProceduralLifeLayer {
     private gazeBreakTargetY;
     private gazeBreakCurrentX;
     private gazeBreakCurrentY;
+    private currentState;
     private microMotionTime;
     private breathingPhase;
     private noiseTime;
@@ -4243,17 +4303,258 @@ declare class ProceduralLifeLayer {
      * @returns Blendshape values and head rotation deltas
      */
     update(delta: number, input?: LifeLayerInput): LifeLayerOutput;
+    /**
+     * Write life layer output directly to a Float32Array[52] in LAM_BLENDSHAPES order.
+     *
+     * Includes micro-jitter (0.4% amplitude simplex noise on all channels) to
+     * break uncanny stillness on undriven channels.
+     *
+     * @param delta - Time since last frame in seconds
+     * @param input - Per-frame input
+     * @param out - Pre-allocated Float32Array(52) to write into
+     */
+    updateToArray(delta: number, input: LifeLayerInput, out: Float32Array): void;
     /**
      * Reset all internal state to initial values.
      */
     reset(): void;
+    /**
+     * Sample next blink interval.
+     * Uses log-normal distribution (PMC3565584) when using default config,
+     * or uniform random when custom blinkIntervalRange is provided.
+     */
+    private nextBlinkInterval;
     private updateBlinks;
     private getBlinkValues;
     private getEyeMicroMotion;
+    /**
+     * Get active gaze parameters — uses state-dependent params when
+     * conversational state is provided, otherwise falls back to config ranges.
+     */
+    private getActiveGazeParams;
     private updateGazeBreaks;
     private updateBrowNoise;
 }
+/**
+ * FACS (Facial Action Coding System) to ARKit Blendshape Mapping
+ *
+ * Two static lookup tables that decompose emotions into FACS Action Units,
+ * then map AUs to ARKit blendshapes. Based on Ekman's FACS research.
+ *
+ * @category Face
+ */
+/**
+ * A single FACS Action Unit activation within an emotion
+ */
+interface AUActivation {
+    /** FACS Action Unit identifier (e.g. 'AU6', 'AU12') */
+    au: string;
+    /** Activation intensity 0-1 */
+    intensity: number;
+    /** Facial region: upper (brows/eyes/cheeks) or lower (mouth/jaw) */
+    region: 'upper' | 'lower';
+}
+/**
+ * Table 1: Emotion → FACS Action Units
+ *
+ * Maps each of the 10 SDK emotion channels to their FACS AU combinations
+ * with intensity and upper/lower face region tags.
+ *
+ * Sources:
+ * - Ekman & Friesen (1978) FACS Manual
+ * - Ekman (2003) Emotions Revealed
+ * - Lucey et al. (2010) Extended Cohn-Kanade dataset
+ */
+declare const EMOTION_TO_AU: Record<EmotionName, AUActivation[]>;
+/**
+ * Table 2: FACS Action Unit → ARKit Blendshapes
+ *
+ * Maps each AU to one or more ARKit blendshape channels with weight.
+ *
+ * Sources:
+ * - Apple ARKit face tracking documentation
+ * - Melinda Ozel's ARKit-to-FACS cheat sheet
+ */
+declare const AU_TO_ARKIT: Record<string, {
+    blendshape: string;
+    weight: number;
+}[]>;
+/**
+ * All AU identifiers referenced by EMOTION_TO_AU (for validation)
+ */
+declare const ALL_AUS: string[];
+/**
+ * EmotionResolver — Resolves EmotionWeights → split upper/lower face Float32Array[52]
+ *
+ * Uses FACS decomposition (EMOTION_TO_AU → AU_TO_ARKIT) to produce
+ * anatomically correct blendshape contributions, split by facial region
+ * for the FaceCompositor's modulation strategy:
+ *   - Upper face: additive overlay (independent of speech)
+ *   - Lower face: modulates speech output
+ *
+ * @category Face
+ */
+/**
+ * Resolved emotion split into upper and lower face contributions.
+ *
+ * WARNING: Buffers are owned by EmotionResolver and are overwritten
+ * on the next resolve() call. Copy if you need to retain values.
+ */
+interface ResolvedEmotion {
+    /** 52 channels — only upper face non-zero. Valid until next resolve() call. */
+    upper: Float32Array;
+    /** 52 channels — only lower face non-zero. Valid until next resolve() call. */
+    lower: Float32Array;
+}
+/**
+ * Resolves EmotionWeights into upper/lower face blendshape arrays
+ * using FACS Action Unit decomposition.
+ */
+declare class EmotionResolver {
+    private readonly upperBuffer;
+    private readonly lowerBuffer;
+    /**
+     * Resolve emotion weights to upper/lower face blendshape contributions.
+     *
+     * @param weights - Emotion channel weights from EmotionController
+     * @param intensity - Global intensity multiplier (0-2). Default: 1.0
+     * @returns Upper and lower face blendshape arrays (52 channels each)
+     */
+    resolve(weights: EmotionWeights, intensity?: number): ResolvedEmotion;
+}
+/**
+ * FaceCompositor — 5-stage signal processing chain for facial animation
+ *
+ * Composes A2E lip sync, emotion modulation, procedural life, and character
+ * profile into a single Float32Array[52] per frame.
+ *
+ * ```
+ * BASE (A2E) → EMOTION MODULATION → PROCEDURAL LIFE → CHARACTER PROFILE → OUTPUT [0,1]
+ * ```
+ *
+ * Replaces manual blendshape merging in consumer code with a single `compose()` call.
+ *
+ * @category Face
+ */
+/**
+ * Output of FaceCompositor.compose()
+ *
+ * WARNING: When using the internal output buffer (no `target` param),
+ * `blendshapes` is a shared reference that is overwritten on the next
+ * compose() call. Copy with `new Float32Array(output.blendshapes)` if
+ * you need to retain values across frames.
+ */
+interface FaceCompositorOutput {
+    /**
+     * 52 ARKit blendshape values, clamped [0,1].
+     *
+     * This buffer is reused across calls when no `target` parameter is
+     * provided to compose(). Valid until the next compose() call.
+     */
+    blendshapes: Float32Array;
+    /** Head rotation deltas in radians (from ProceduralLifeLayer) */
+    headDelta: {
+        yaw: number;
+        pitch: number;
+    };
+}
+/**
+ * Per-blendshape character profile (multiplier + offset)
+ *
+ * Superset of ExpressionProfile — gives per-channel control instead of per-group.
+ */
+interface CharacterProfile {
+    /** Per-blendshape multiplier (default: all 1.0) */
+    multiplier?: Partial<Record<string, number>>;
+    /** Per-blendshape offset (default: all 0.0) */
+    offset?: Partial<Record<string, number>>;
+}
+/**
+ * Configuration for FaceCompositor
+ */
+interface FaceCompositorConfig {
+    /** ProceduralLifeLayer instance (compositor creates default if omitted) */
+    lifeLayer?: ProceduralLifeLayer;
+    /** Character profile: per-BS multiplier + offset */
+    profile?: CharacterProfile;
+    /** Emotion smoothing factor per frame (0-1). Default: 0.12 */
+    emotionSmoothing?: number;
+}
+/**
+ * Per-frame input to the compositor
+ */
+interface FaceCompositorInput extends LifeLayerInput {
+    /** Delta time in seconds */
+    deltaTime: number;
+    /** Current emotion weights (from EmotionController.emotion or manual) */
+    emotion?: EmotionWeights;
+    /** Emotion intensity multiplier (0-2). Default: 1.0 */
+    emotionIntensity?: number;
+}
+/**
+ * FaceCompositor — 5-stage facial animation signal chain.
+ *
+ * @example
+ * ```typescript
+ * import { FaceCompositor, createA2E } from '@omote/core';
+ *
+ * const compositor = new FaceCompositor();
+ *
+ * // In animation loop:
+ * const output = compositor.compose(a2eFrame, {
+ *   deltaTime: 0.016,
+ *   emotion: { joy: 0.8 },
+ *   isSpeaking: true,
+ *   audioEnergy: 0.5,
+ * });
+ *
+ * // Apply output.blendshapes[0..51] to avatar morphTargetInfluences
+ * ```
+ */
+declare class FaceCompositor {
+    private readonly emotionResolver;
+    private readonly lifeLayer;
+    private readonly emotionSmoothing;
+    private readonly outputBuffer;
+    private readonly smoothedUpper;
+    private readonly smoothedLower;
+    private readonly lifeBuffer;
+    private readonly multiplier;
+    private readonly offset;
+    private stickyEmotion;
+    constructor(config?: FaceCompositorConfig);
+    /**
+     * Compose a single output frame from the 5-stage signal chain.
+     *
+     * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
+     * @param input - Per-frame input (deltaTime, emotion, life layer params)
+     * @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
+     *   When omitted, an internal buffer is used (valid until next compose() call).
+     * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
+     */
+    compose(base: Float32Array, input: FaceCompositorInput, target?: Float32Array): FaceCompositorOutput;
+    /**
+     * Set sticky emotion (used when input.emotion is not provided).
+     */
+    setEmotion(weights: EmotionWeights): void;
+    /**
+     * Update character profile at runtime.
+     */
+    setProfile(profile: CharacterProfile): void;
+    /**
+     * Reset all smoothing state and life layer.
+     */
+    reset(): void;
+    /** Expand partial profile maps into dense Float32Arrays */
+    private applyProfileArrays;
+}
 /**
  * MicLipSync - Microphone → VAD → A2E → blendshapes
  *
@@ -4539,4 +4840,4 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private clearSilenceTimer;
 }
-export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ARKIT_BLENDSHAPES, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
+export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };