@omote/core 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.js';
2
2
  export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
3
3
  import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
4
4
  export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.js';
5
+ export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
5
6
 
6
7
  /**
7
8
  * Microphone capture - renderer-agnostic audio input
@@ -758,6 +759,483 @@ declare class SyncedAudioPipeline extends EventEmitter<SyncedAudioPipelineEvents
758
759
  dispose(): void;
759
760
  }
760
761
 
762
+ /**
763
+ * Emotion to ARKit Blendshape Mapper
764
+ *
765
+ * Converts Emotion2VecInference output to upper face ARKit blendshapes for
766
+ * expressive avatar animation. Maps 4 emotion categories (neutral, happy, angry, sad)
767
+ * to 11 upper face blendshapes (brows, eyes, cheeks).
768
+ *
769
+ * Supports two blend modes:
770
+ * - 'dominant': Uses only the strongest emotion (simpler, more stable)
771
+ * - 'weighted': Blends all emotions by probability (more nuanced, e.g., bittersweet)
772
+ *
773
+ * Also supports energy modulation to scale emotion intensity by audio energy,
774
+ * making expressions stronger during emphasized speech.
775
+ *
776
+ * @example Basic usage
777
+ * ```typescript
778
+ * import { EmotionToBlendshapeMapper } from '@omote/core';
779
+ * import { Emotion2VecInference } from '@omote/core';
780
+ *
781
+ * const emotion = new Emotion2VecInference({ modelUrl: '/models/emotion.onnx' });
782
+ * const mapper = new EmotionToBlendshapeMapper();
783
+ *
784
+ * // Process emotion frame
785
+ * const result = await emotion.infer(audioSamples);
786
+ * const blendshapes = mapper.mapFrame(result.dominant);
787
+ *
788
+ * // Apply to avatar
789
+ * for (const [name, value] of Object.entries(blendshapes)) {
790
+ * avatar.setBlendshape(name, value);
791
+ * }
792
+ * ```
793
+ *
794
+ * @example Weighted blending for nuanced expressions
795
+ * ```typescript
796
+ * const mapper = new EmotionToBlendshapeMapper({
797
+ * blendMode: 'weighted',
798
+ * minBlendProbability: 0.1,
799
+ * });
800
+ *
801
+ * // Frame with mixed emotions: { happy: 0.6, sad: 0.3, neutral: 0.1 }
802
+ * // Result: bittersweet expression (smiling but worried brow)
803
+ * const blendshapes = mapper.mapFrame(emotionFrame);
804
+ * ```
805
+ *
806
+ * @example Energy-modulated emotion
807
+ * ```typescript
808
+ * import { AudioEnergyAnalyzer } from '@omote/core';
809
+ *
810
+ * const energyAnalyzer = new AudioEnergyAnalyzer();
811
+ * const mapper = new EmotionToBlendshapeMapper({ energyModulation: true });
812
+ *
813
+ * // In animation loop
814
+ * function animate(audioChunk: Float32Array, emotionFrame: EmotionFrame) {
815
+ * const { energy } = energyAnalyzer.analyze(audioChunk);
816
+ * mapper.mapFrame(emotionFrame, energy); // Louder = stronger emotion
817
+ * mapper.update(16);
818
+ * applyToAvatar(mapper.getCurrentBlendshapes());
819
+ * }
820
+ * ```
821
+ *
822
+ * @module animation
823
+ */
824
+ declare const EMOTION2VEC_LABELS: readonly ["neutral", "happy", "angry", "sad"];
825
+ type Emotion2VecLabel = (typeof EMOTION2VEC_LABELS)[number];
826
+ interface EmotionFrame {
827
+ /** Primary emotion label */
828
+ emotion: Emotion2VecLabel;
829
+ /** Confidence for primary emotion (0-1) */
830
+ confidence: number;
831
+ /** All emotion probabilities */
832
+ probabilities: Record<Emotion2VecLabel, number>;
833
+ }
834
+ /**
835
+ * Upper face ARKit blendshape names (11 total)
836
+ *
837
+ * These blendshapes control the upper face (brows, eyes, cheeks) and are
838
+ * driven by emotion detection, complementing the mouth blendshapes from
839
+ * LAM lip sync.
840
+ */
841
+ declare const UPPER_FACE_BLENDSHAPES: readonly ["browDownLeft", "browDownRight", "browInnerUp", "browOuterUpLeft", "browOuterUpRight", "eyeSquintLeft", "eyeSquintRight", "eyeWideLeft", "eyeWideRight", "cheekSquintLeft", "cheekSquintRight"];
842
+ type UpperFaceBlendshapeName = (typeof UPPER_FACE_BLENDSHAPES)[number];
843
+ /**
844
+ * Upper face blendshape values (0-1 for each)
845
+ */
846
+ type UpperFaceBlendshapes = Record<UpperFaceBlendshapeName, number>;
847
+ /**
848
+ * Blend mode for combining emotions
849
+ * - 'dominant': Use only the strongest emotion (default, more stable)
850
+ * - 'weighted': Blend all emotions by probability (more nuanced)
851
+ */
852
+ type EmotionBlendMode = 'dominant' | 'weighted';
853
+ /**
854
+ * Emotion to ARKit blendshape mapping
855
+ *
856
+ * Based on Paul Ekman's FACS (Facial Action Coding System) research:
857
+ *
858
+ * - Happy (AU6+AU12): Cheek raise + lip corner pull (Duchenne smile)
859
+ * Upper face: cheekSquint (AU6) + slight eyeSquint from genuine smile
860
+ *
861
+ * - Angry (AU4+AU5+AU7+AU23): Brow lower + eye wide + lid tighten + lip press
862
+ * Upper face: browDown (AU4) + eyeWide (AU5) + eyeSquint (AU7) creates the "glare"
863
+ *
864
+ * - Sad (AU1+AU4+AU15): Inner brow raise + brow furrow + lip corner depress
865
+ * Upper face: browInnerUp (AU1) + browDown (AU4) creates the worried/sad brow
866
+ *
867
+ * - Neutral: All zeros (no expression overlay)
868
+ *
869
+ * @see https://imotions.com/blog/learning/research-fundamentals/facial-action-coding-system/
870
+ * @see https://melindaozel.com/arkit-to-facs-cheat-sheet/
871
+ */
872
+ declare const EMOTION_ARKIT_MAP: Record<Emotion2VecLabel, Partial<UpperFaceBlendshapes>>;
873
+ /**
874
+ * Configuration for EmotionToBlendshapeMapper
875
+ */
876
+ interface EmotionBlendshapeConfig {
877
+ /**
878
+ * Smoothing factor for exponential moving average (0-1)
879
+ * Lower = slower, smoother transitions
880
+ * Higher = faster, more responsive
881
+ * @default 0.15
882
+ */
883
+ smoothingFactor?: number;
884
+ /**
885
+ * Minimum confidence threshold for emotion to take effect
886
+ * Emotions below this confidence are treated as neutral
887
+ * @default 0.3
888
+ */
889
+ confidenceThreshold?: number;
890
+ /**
891
+ * Global intensity multiplier for all blendshapes (0-2)
892
+ * @default 1.0
893
+ */
894
+ intensity?: number;
895
+ /**
896
+ * Blend mode for combining emotions
897
+ * - 'dominant': Use only the strongest emotion (default)
898
+ * - 'weighted': Blend all emotions by probability
899
+ * @default 'dominant'
900
+ */
901
+ blendMode?: EmotionBlendMode;
902
+ /**
903
+ * Minimum probability for an emotion to contribute in weighted blend mode
904
+ * Emotions with probability below this are ignored
905
+ * @default 0.1
906
+ */
907
+ minBlendProbability?: number;
908
+ /**
909
+ * Enable energy modulation - scale emotion intensity by audio energy
910
+ * When enabled, louder speech produces stronger expressions
911
+ * @default false
912
+ */
913
+ energyModulation?: boolean;
914
+ /**
915
+ * Minimum energy scale when energy modulation is enabled (0-1)
916
+ * At zero audio energy, emotion intensity is scaled by this factor
917
+ * @default 0.3
918
+ */
919
+ minEnergyScale?: number;
920
+ /**
921
+ * Maximum energy scale when energy modulation is enabled (0-2)
922
+ * At maximum audio energy, emotion intensity is scaled by this factor
923
+ * @default 1.0
924
+ */
925
+ maxEnergyScale?: number;
926
+ }
927
+ /**
928
+ * EmotionToBlendshapeMapper
929
+ *
930
+ * Converts emotion detection output to upper face ARKit blendshapes.
931
+ * Provides smooth transitions between emotion states using exponential
932
+ * moving average interpolation.
933
+ *
934
+ * Supports two blend modes:
935
+ * - 'dominant': Uses only the strongest emotion
936
+ * - 'weighted': Blends all emotions by probability for nuanced expressions
937
+ *
938
+ * Also supports energy modulation to scale emotion intensity by audio energy.
939
+ */
940
+ declare class EmotionToBlendshapeMapper {
941
+ private config;
942
+ private targetBlendshapes;
943
+ private currentBlendshapes;
944
+ private currentEnergy;
945
+ /**
946
+ * Create a new EmotionToBlendshapeMapper
947
+ *
948
+ * @param config - Optional configuration
949
+ */
950
+ constructor(config?: EmotionBlendshapeConfig);
951
+ /**
952
+ * Map an emotion frame to target blendshapes
953
+ *
954
+ * This sets the target values that the mapper will smoothly interpolate
955
+ * towards. Call update() each frame to apply smoothing.
956
+ *
957
+ * @param frame - Emotion frame from Emotion2VecInference
958
+ * @param audioEnergy - Optional audio energy (0-1) for energy modulation
959
+ * @returns Target upper face blendshapes (before smoothing)
960
+ */
961
+ mapFrame(frame: EmotionFrame, audioEnergy?: number): UpperFaceBlendshapes;
962
+ /**
963
+ * Map using dominant emotion only (original behavior)
964
+ */
965
+ private mapFrameDominant;
966
+ /**
967
+ * Map using weighted blend of all emotions by probability
968
+ * Creates more nuanced expressions (e.g., bittersweet = happy + sad)
969
+ */
970
+ private mapFrameWeighted;
971
+ /**
972
+ * Apply energy modulation to scale emotion intensity by audio energy
973
+ * Louder speech = stronger expressions
974
+ */
975
+ private applyEnergyModulation;
976
+ /**
977
+ * Apply smoothing to interpolate current values towards target
978
+ *
979
+ * Uses exponential moving average:
980
+ * current = current + smoothingFactor * (target - current)
981
+ *
982
+ * @param _deltaMs - Delta time in milliseconds (reserved for future time-based smoothing)
983
+ */
984
+ update(_deltaMs: number): void;
985
+ /**
986
+ * Get current smoothed blendshape values
987
+ *
988
+ * @returns Current upper face blendshapes (after smoothing)
989
+ */
990
+ getCurrentBlendshapes(): UpperFaceBlendshapes;
991
+ /**
992
+ * Reset mapper to neutral state
993
+ *
994
+ * Sets both target and current blendshapes to zero.
995
+ */
996
+ reset(): void;
997
+ /**
998
+ * Get current configuration
999
+ */
1000
+ getConfig(): Required<EmotionBlendshapeConfig>;
1001
+ /**
1002
+ * Update configuration
1003
+ *
1004
+ * @param config - Partial configuration to update
1005
+ */
1006
+ setConfig(config: Partial<EmotionBlendshapeConfig>): void;
1007
+ }
1008
+
1009
+ /**
1010
+ * FullFacePipeline - Combined LAM lip sync + Emotion upper face pipeline
1011
+ *
1012
+ * Orchestrates full-face animation by combining:
1013
+ * 1. LAM lip sync (52 ARKit blendshapes) via audio-first scheduling
1014
+ * 2. Emotion labels (from backend LLM or `setEmotionLabel()`) for upper face
1015
+ * 3. AudioEnergyAnalyzer for prosody-driven fallback when no emotion label is set
1016
+ *
1017
+ * Architecture: Audio-First, LAM-Background (same as SyncedAudioPipeline)
1018
+ * - Audio chunks are scheduled for playback immediately (never waits for LAM)
1019
+ * - LAM inference runs in background without blocking the audio path
1020
+ * - Lip sync starts ~1 second after audio (LAM needs 16000 samples to infer)
1021
+ *
1022
+ * Merge Strategy:
1023
+ * - Lower face (41 blendshapes): 100% from LAM (mouth, jaw, tongue, etc.)
1024
+ * - Upper face (11 blendshapes): Emotion overlay with LAM as subtle fallback
1025
+ * Formula: emotion * emotionBlendFactor + lam * lamBlendFactor
1026
+ *
1027
+ * Emotion Sources (in priority order):
1028
+ * 1. `setEmotionLabel()` — explicit label from backend LLM (recommended)
1029
+ * 2. Prosody fallback — subtle brow movement from audio energy (automatic)
1030
+ *
1031
+ * @category Audio
1032
+ *
1033
+ * @example Basic usage
1034
+ * ```typescript
1035
+ * import { FullFacePipeline } from '@omote/core';
1036
+ *
1037
+ * const pipeline = new FullFacePipeline({
1038
+ * lam,
1039
+ * emotionBlendFactor: 0.8,
1040
+ * lamBlendFactor: 0.2,
1041
+ * });
1042
+ * await pipeline.initialize();
1043
+ *
1044
+ * pipeline.on('full_frame_ready', (frame) => {
1045
+ * applyToAvatar(frame.blendshapes);
1046
+ * });
1047
+ *
1048
+ * pipeline.start();
1049
+ * pipeline.setEmotionLabel('happy'); // From backend LLM
1050
+ * await pipeline.onAudioChunk(audioData);
1051
+ * ```
1052
+ */
1053
+
1054
+ /**
1055
+ * Configuration for FullFacePipeline
1056
+ */
1057
+ interface FullFacePipelineOptions {
1058
+ /** Sample rate in Hz (default: 16000) */
1059
+ sampleRate?: number;
1060
+ /** Target chunk duration in ms for coalescing (default: 200) */
1061
+ chunkTargetMs?: number;
1062
+ /**
1063
+ * Audio playback delay in ms before first audio plays.
1064
+ * Gives LAM inference time to pre-compute blendshapes.
1065
+ * Default: auto-detected from lam.backend (50ms WebGPU, 350ms WASM).
1066
+ */
1067
+ audioDelayMs?: number;
1068
+ /** LAM inference engine */
1069
+ lam: LipSyncBackend;
1070
+ /**
1071
+ * Emotion blend factor for upper face blendshapes (0-1)
1072
+ * Higher values give more weight to emotion detection
1073
+ * @default 0.8
1074
+ */
1075
+ emotionBlendFactor?: number;
1076
+ /**
1077
+ * LAM blend factor for upper face blendshapes (0-1)
1078
+ * Provides subtle fallback from LAM when emotion is weak
1079
+ * @default 0.2
1080
+ */
1081
+ lamBlendFactor?: number;
1082
+ }
1083
+ /**
1084
+ * Full face frame with merged blendshapes and emotion data
1085
+ */
1086
+ interface FullFaceFrame {
1087
+ /** Merged 52 ARKit blendshapes (lower face from LAM + upper face from emotion) */
1088
+ blendshapes: Float32Array;
1089
+ /** Original LAM blendshapes (52) */
1090
+ lamBlendshapes: Float32Array;
1091
+ /** Emotion-driven upper face blendshapes (11) */
1092
+ emotionBlendshapes: UpperFaceBlendshapes;
1093
+ /** Raw emotion frame data */
1094
+ emotion: EmotionFrame | null;
1095
+ /** AudioContext timestamp for this frame */
1096
+ timestamp: number;
1097
+ }
1098
+ /**
1099
+ * Events emitted by FullFacePipeline
1100
+ */
1101
+ interface FullFacePipelineEvents {
1102
+ /** New merged frame ready for display */
1103
+ full_frame_ready: FullFaceFrame;
1104
+ /** Raw LAM frame ready (for debugging/monitoring) */
1105
+ lam_frame_ready: Float32Array;
1106
+ /** Emotion frame ready (for debugging/monitoring) */
1107
+ emotion_frame_ready: EmotionFrame;
1108
+ /** Playback has completed */
1109
+ playback_complete: void;
1110
+ /** First frame ready, playback starting */
1111
+ playback_start: number;
1112
+ /** Error occurred */
1113
+ error: Error;
1114
+ /** Index signature for EventEmitter compatibility */
1115
+ [key: string]: unknown;
1116
+ }
1117
+ /**
1118
+ * FullFacePipeline - Unified LAM + Emotion animation pipeline
1119
+ *
1120
+ * Audio-first design matching SyncedAudioPipeline:
1121
+ * - Audio is scheduled immediately (never waits for LAM)
1122
+ * - LAM runs in background (fire-and-forget)
1123
+ * - Emotion from setEmotionLabel() or prosody fallback
1124
+ */
1125
+ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
1126
+ private readonly options;
1127
+ private scheduler;
1128
+ private coalescer;
1129
+ private lamPipeline;
1130
+ private emotionMapper;
1131
+ private energyAnalyzer;
1132
+ private playbackStarted;
1133
+ private monitorInterval;
1134
+ private frameAnimationId;
1135
+ private lastEmotionFrame;
1136
+ private currentAudioEnergy;
1137
+ private lastNewFrameTime;
1138
+ private lastKnownLamFrame;
1139
+ private staleWarningEmitted;
1140
+ private static readonly STALE_FRAME_THRESHOLD_MS;
1141
+ private emotionBlendFactor;
1142
+ private lamBlendFactor;
1143
+ constructor(options: FullFacePipelineOptions);
1144
+ /**
1145
+ * Initialize the pipeline
1146
+ */
1147
+ initialize(): Promise<void>;
1148
+ /**
1149
+ * Set emotion label from backend (e.g., LLM response emotion).
1150
+ *
1151
+ * Converts a natural language emotion label into an EmotionFrame
1152
+ * that drives upper face blendshapes for the duration of the utterance.
1153
+ *
1154
+ * Supported labels: happy, excited, joyful, sad, melancholic, angry,
1155
+ * frustrated, neutral, etc.
1156
+ *
1157
+ * @param label - Emotion label string (case-insensitive)
1158
+ */
1159
+ setEmotionLabel(label: string): void;
1160
+ /**
1161
+ * Clear any set emotion label.
1162
+ * Falls back to prosody-only upper face animation.
1163
+ */
1164
+ clearEmotionLabel(): void;
1165
+ /**
1166
+ * Start a new playback session
1167
+ *
1168
+ * Resets all state and prepares for incoming audio chunks.
1169
+ * Audio will be scheduled immediately as chunks arrive (no buffering).
1170
+ */
1171
+ start(): void;
1172
+ /**
1173
+ * Receive audio chunk from network
1174
+ *
1175
+ * Audio-first design: schedules audio immediately, LAM runs in background.
1176
+ * This prevents LAM inference (50-300ms) from blocking audio scheduling.
1177
+ *
1178
+ * @param chunk - Uint8Array containing Int16 PCM audio
1179
+ */
1180
+ onAudioChunk(chunk: Uint8Array): Promise<void>;
1181
+ /**
1182
+ * Get emotion frame for current animation.
1183
+ *
1184
+ * Priority:
1185
+ * 1. Explicit emotion label from setEmotionLabel()
1186
+ * 2. Prosody fallback: subtle brow movement from audio energy
1187
+ */
1188
+ private getEmotionFrame;
1189
+ /**
1190
+ * Merge LAM blendshapes with emotion upper face blendshapes
1191
+ */
1192
+ mergeBlendshapes(lamFrame: Float32Array, emotionFrame: EmotionFrame | null, audioEnergy?: number): {
1193
+ merged: Float32Array;
1194
+ emotionBlendshapes: UpperFaceBlendshapes;
1195
+ };
1196
+ /**
1197
+ * Start frame animation loop
1198
+ */
1199
+ private startFrameLoop;
1200
+ /**
1201
+ * End of audio stream
1202
+ */
1203
+ end(): Promise<void>;
1204
+ /**
1205
+ * Stop playback immediately with smooth fade-out
1206
+ */
1207
+ stop(fadeOutMs?: number): Promise<void>;
1208
+ /**
1209
+ * Start monitoring for playback completion
1210
+ */
1211
+ private startMonitoring;
1212
+ /**
1213
+ * Stop monitoring
1214
+ */
1215
+ private stopMonitoring;
1216
+ /**
1217
+ * Get current pipeline state (for debugging/monitoring)
1218
+ */
1219
+ getState(): {
1220
+ playbackStarted: boolean;
1221
+ coalescerFill: number;
1222
+ lamFill: number;
1223
+ queuedLAMFrames: number;
1224
+ emotionLabel: "neutral" | "happy" | "angry" | "sad" | null;
1225
+ currentAudioEnergy: number;
1226
+ currentTime: number;
1227
+ playbackEndTime: number;
1228
+ };
1229
+ /**
1230
+ * Check if an explicit emotion label is currently set
1231
+ */
1232
+ get hasEmotionLabel(): boolean;
1233
+ /**
1234
+ * Cleanup resources
1235
+ */
1236
+ dispose(): void;
1237
+ }
1238
+
761
1239
  /**
762
1240
  * Lazy ONNX Runtime loader with conditional WebGPU/WASM bundle loading
763
1241
  *
@@ -1179,6 +1657,8 @@ declare class Wav2Vec2Inference implements LipSyncBackend {
1179
1657
  private isLoading;
1180
1658
  private numIdentityClasses;
1181
1659
  private inferenceQueue;
1660
+ private poisoned;
1661
+ private static readonly INFERENCE_TIMEOUT_MS;
1182
1662
  constructor(config: Wav2Vec2InferenceConfig);
1183
1663
  /**
1184
1664
  * Check if WebGPU is available and working
@@ -1187,6 +1667,8 @@ declare class Wav2Vec2Inference implements LipSyncBackend {
1187
1667
  static isWebGPUAvailable: typeof isWebGPUAvailable;
1188
1668
  get backend(): 'webgpu' | 'wasm' | null;
1189
1669
  get isLoaded(): boolean;
1670
+ /** True if inference timed out and the session is permanently unusable */
1671
+ get isSessionPoisoned(): boolean;
1190
1672
  /**
1191
1673
  * Load the ONNX model
1192
1674
  */
@@ -4069,4 +4551,157 @@ declare class EmphasisDetector {
4069
4551
  reset(): void;
4070
4552
  }
4071
4553
 
4072
- export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };
4554
+ /**
4555
+ * ProceduralLifeLayer - Renderer-agnostic procedural animation system
4556
+ *
4557
+ * Outputs per-frame blendshape values and head deltas for organic life-like
4558
+ * animation. No Three.js, no React, no R3F — just math.
4559
+ *
4560
+ * Implements research-based eye behavior, blinks, gaze breaks, microsaccades,
4561
+ * breathing/postural sway, and simplex noise-driven brow drift.
4562
+ *
4563
+ * Research sources:
4564
+ * - Blink frequency: 15-20/min (every 3-4s), PMC4043155
4565
+ * - Saccade latency: ~200ms, duration 20-200ms
4566
+ * - Microsaccades: ~1/second, amplitude 0.02-0.05, Scholarpedia
4567
+ * - Fixation duration: 200-350ms, Nature Scientific Reports
4568
+ * - Brow noise: NVIDIA Audio2Face, Unreal MetaHuman layered procedural animation
4569
+ *
4570
+ * @category Animation
4571
+ *
4572
+ * @example
4573
+ * ```typescript
4574
+ * import { ProceduralLifeLayer } from '@omote/core';
4575
+ *
4576
+ * const lifeLayer = new ProceduralLifeLayer();
4577
+ *
4578
+ * // In animation loop:
4579
+ * const output = lifeLayer.update(delta, {
4580
+ * eyeTargetX: normalizedX, // -1..1 from camera math
4581
+ * eyeTargetY: normalizedY,
4582
+ * audioEnergy: energy, // 0-1 from AudioEnergyAnalyzer
4583
+ * isSpeaking: true,
4584
+ * });
4585
+ *
4586
+ * // Apply blendshapes to mesh
4587
+ * for (const [name, value] of Object.entries(output.blendshapes)) {
4588
+ * const idx = mesh.morphTargetDictionary?.[name];
4589
+ * if (idx !== undefined) mesh.morphTargetInfluences![idx] = value;
4590
+ * }
4591
+ *
4592
+ * // Apply head delta to head bone
4593
+ * headBone.rotation.y += output.headDelta.yaw;
4594
+ * headBone.rotation.x += output.headDelta.pitch;
4595
+ * ```
4596
+ */
4597
+ /**
4598
+ * Configuration for ProceduralLifeLayer
4599
+ */
4600
+ interface LifeLayerConfig {
4601
+ /** Seconds between blinks [min, max]. Default: [2.5, 6] */
4602
+ blinkIntervalRange?: [number, number];
4603
+ /** Seconds between gaze breaks [min, max]. Default: [3, 8] */
4604
+ gazeBreakIntervalRange?: [number, number];
4605
+ /** Gaze break deviation range [min, max]. Default: [0.15, 0.4] */
4606
+ gazeBreakAmplitudeRange?: [number, number];
4607
+ /** Eye micro-motion noise amplitude (0 to disable). Default: 0.06 */
4608
+ eyeNoiseAmplitude?: number;
4609
+ /** Base simplex noise amplitude for brow drift. Default: 0.30 */
4610
+ browNoiseAmplitude?: number;
4611
+ /** Multiply brow noise when speaking. Default: 2.0 */
4612
+ browNoiseSpeechMultiplier?: number;
4613
+ /** Breathing rate in Hz (0.25 = 15 breaths/min). Default: 0.25 */
4614
+ breathingRate?: number;
4615
+ /** Postural sway amplitude in radians. Default: 0.002 */
4616
+ posturalSwayAmplitude?: number;
4617
+ /** Max eye movement from center (0-1). Default: 0.8 */
4618
+ eyeMaxDeviation?: number;
4619
+ /** Eye smoothing factor (higher = faster response). Default: 15 */
4620
+ eyeSmoothing?: number;
4621
+ }
4622
+ /**
4623
+ * Per-frame input to the life layer
4624
+ */
4625
+ interface LifeLayerInput {
4626
+ /** Normalized eye target X: -1 (left) to 1 (right). Consumer computes from camera. */
4627
+ eyeTargetX?: number;
4628
+ /** Normalized eye target Y: -1 (down) to 1 (up). Consumer computes from camera. */
4629
+ eyeTargetY?: number;
4630
+ /** Audio energy 0-1 (from AudioEnergyAnalyzer). Drives brow noise amplitude. */
4631
+ audioEnergy?: number;
4632
+ /** Whether avatar is speaking. Multiplies brow noise amplitude. */
4633
+ isSpeaking?: boolean;
4634
+ }
4635
+ /**
4636
+ * Per-frame output from the life layer
4637
+ */
4638
+ interface LifeLayerOutput {
4639
+ /** Blendshape values to SET directly on mesh (eyes, brows, cheeks). */
4640
+ blendshapes: Record<string, number>;
4641
+ /** Head rotation deltas in radians. Consumer adds to head bone rotation. */
4642
+ headDelta: {
4643
+ yaw: number;
4644
+ pitch: number;
4645
+ };
4646
+ }
4647
+ /**
4648
+ * ProceduralLifeLayer - Renderer-agnostic procedural animation
4649
+ *
4650
+ * Generates per-frame blendshape values and head rotation deltas
4651
+ * for natural eye behavior, blinks, brow movement, and breathing.
4652
+ */
4653
+ declare class ProceduralLifeLayer {
4654
+ private blinkIntervalRange;
4655
+ private gazeBreakIntervalRange;
4656
+ private gazeBreakAmplitudeRange;
4657
+ private eyeNoiseAmplitude;
4658
+ private browNoiseAmplitude;
4659
+ private browNoiseSpeechMultiplier;
4660
+ private breathingRate;
4661
+ private posturalSwayAmplitude;
4662
+ private eyeMaxDeviation;
4663
+ private eyeSmoothing;
4664
+ private blinkTimer;
4665
+ private blinkInterval;
4666
+ private blinkPhase;
4667
+ private blinkProgress;
4668
+ private asymmetryRight;
4669
+ private smoothedBlinkLeft;
4670
+ private smoothedBlinkRight;
4671
+ private smoothedEyeX;
4672
+ private smoothedEyeY;
4673
+ private eyeNoiseTime;
4674
+ private gazeBreakTimer;
4675
+ private gazeBreakInterval;
4676
+ private gazeBreakPhase;
4677
+ private gazeBreakProgress;
4678
+ private gazeBreakTargetX;
4679
+ private gazeBreakTargetY;
4680
+ private gazeBreakCurrentX;
4681
+ private gazeBreakCurrentY;
4682
+ private microMotionTime;
4683
+ private breathingPhase;
4684
+ private noiseTime;
4685
+ private previousEnergy;
4686
+ private emphasisLevel;
4687
+ constructor(config?: LifeLayerConfig);
4688
+ /**
4689
+ * Update the life layer and produce output for this frame.
4690
+ *
4691
+ * @param delta - Time since last frame in seconds
4692
+ * @param input - Per-frame input (eye target, audio energy, speaking state)
4693
+ * @returns Blendshape values and head rotation deltas
4694
+ */
4695
+ update(delta: number, input?: LifeLayerInput): LifeLayerOutput;
4696
+ /**
4697
+ * Reset all internal state to initial values.
4698
+ */
4699
+ reset(): void;
4700
+ private updateBlinks;
4701
+ private getBlinkValues;
4702
+ private getEyeMicroMotion;
4703
+ private updateGazeBreaks;
4704
+ private updateBrowNoise;
4705
+ }
4706
+
4707
+ export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };