npm - @omote/core - Versions diffs - 0.6.4 → 0.6.6 - Mend

@omote/core 0.6.4 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -2350,20 +2350,15 @@ declare class Wav2ArkitCpuInference implements A2EBackend {
 /**
  * Factory function for A2E with automatic GPU/CPU model selection
  *
- * Provides a unified API that automatically selects the optimal model:
- * - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (404MB, WASM)
- * - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (192MB fp16, WebGPU)
- * - Fallback: Gracefully falls back to CPU model if GPU model fails to load
- *
- * Why two separate models?
- * Wav2Vec2 (LAM) cannot run on Safari/iOS for two reasons:
- * 1. Its dual-head transformer graph needs ~750-950MB peak during ORT session
- *    creation (graph optimization), exceeding iOS WebKit's ~1-1.5GB tab limit.
- * 2. It ships as a single 192MB .onnx file (fp16) that must load into JS heap
- *    before ORT can consume it. iOS WebKit OOMs on this allocation.
- * wav2arkit_cpu solves both: external data format (1.86MB graph + 402MB weights)
- * lets ORT load only the tiny graph, then stream weights via URL pass-through
- * directly into WASM memory. JS heap stays at ~2MB.
+ * Provides a unified API that always tries Wav2Vec2 (LAM fp16) first:
+ * - All platforms: Tries Wav2Vec2Inference (192MB fp16, external data format)
+ * - Fallback: Gracefully falls back to wav2arkit_cpu if GPU model fails to load
+ *
+ * The fp16 external data format (385KB graph + 192MB weights) enables iOS support:
+ * - URL pass-through: ORT streams weights directly into WASM memory (~2MB JS heap)
+ * - Basic graph optimization: avoids ~750-950MB peak from 'all' optimization
+ * - If iOS OOMs during session creation, A2EWithFallback catches it and loads
+ *   wav2arkit_cpu (1.86MB graph + 402MB weights) as a safe fallback.
  *
  * @category Inference
  *
@@ -4404,12 +4399,15 @@ declare const ALL_AUS: string[];
  */
 /**
- * Resolved emotion split into upper and lower face contributions
+ * Resolved emotion split into upper and lower face contributions.
+ *
+ * WARNING: Buffers are owned by EmotionResolver and are overwritten
+ * on the next resolve() call. Copy if you need to retain values.
  */
 interface ResolvedEmotion {
-    /** 52 channels — only upper face (brows, eyes, cheeks, nose) non-zero */
+    /** 52 channels — only upper face non-zero. Valid until next resolve() call. */
     upper: Float32Array;
-    /** 52 channels — only lower face (mouth, jaw) non-zero */
+    /** 52 channels — only lower face non-zero. Valid until next resolve() call. */
     lower: Float32Array;
 }
 /**
@@ -4444,6 +4442,28 @@ declare class EmotionResolver {
  * @category Face
  */
+/**
+ * Output of FaceCompositor.compose()
+ *
+ * WARNING: When using the internal output buffer (no `target` param),
+ * `blendshapes` is a shared reference that is overwritten on the next
+ * compose() call. Copy with `new Float32Array(output.blendshapes)` if
+ * you need to retain values across frames.
+ */
+interface FaceCompositorOutput {
+    /**
+     * 52 ARKit blendshape values, clamped [0,1].
+     *
+     * This buffer is reused across calls when no `target` parameter is
+     * provided to compose(). Valid until the next compose() call.
+     */
+    blendshapes: Float32Array;
+    /** Head rotation deltas in radians (from ProceduralLifeLayer) */
+    headDelta: {
+        yaw: number;
+        pitch: number;
+    };
+}
 /**
  * Per-blendshape character profile (multiplier + offset)
  *
@@ -4494,13 +4514,14 @@ interface FaceCompositorInput extends LifeLayerInput {
  *   audioEnergy: 0.5,
  * });
  *
- * // Apply output[0..51] to avatar morphTargetInfluences
+ * // Apply output.blendshapes[0..51] to avatar morphTargetInfluences
  * ```
  */
 declare class FaceCompositor {
     private readonly emotionResolver;
     private readonly lifeLayer;
     private readonly emotionSmoothing;
+    private readonly outputBuffer;
     private readonly smoothedUpper;
     private readonly smoothedLower;
     private readonly lifeBuffer;
@@ -4513,9 +4534,11 @@ declare class FaceCompositor {
      *
      * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
      * @param input - Per-frame input (deltaTime, emotion, life layer params)
-     * @returns Float32Array[52] with all values clamped to [0, 1]
+     * @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
+     *   When omitted, an internal buffer is used (valid until next compose() call).
+     * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
      */
-    compose(base: Float32Array, input: FaceCompositorInput): Float32Array;
+    compose(base: Float32Array, input: FaceCompositorInput, target?: Float32Array): FaceCompositorOutput;
     /**
      * Set sticky emotion (used when input.emotion is not provided).
      */
@@ -4817,4 +4840,4 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private clearSilenceTimer;
 }
-export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
+export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };

package/dist/index.d.ts CHANGED Viewed

@@ -2350,20 +2350,15 @@ declare class Wav2ArkitCpuInference implements A2EBackend {
 /**
  * Factory function for A2E with automatic GPU/CPU model selection
  *
- * Provides a unified API that automatically selects the optimal model:
- * - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (404MB, WASM)
- * - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (192MB fp16, WebGPU)
- * - Fallback: Gracefully falls back to CPU model if GPU model fails to load
- *
- * Why two separate models?
- * Wav2Vec2 (LAM) cannot run on Safari/iOS for two reasons:
- * 1. Its dual-head transformer graph needs ~750-950MB peak during ORT session
- *    creation (graph optimization), exceeding iOS WebKit's ~1-1.5GB tab limit.
- * 2. It ships as a single 192MB .onnx file (fp16) that must load into JS heap
- *    before ORT can consume it. iOS WebKit OOMs on this allocation.
- * wav2arkit_cpu solves both: external data format (1.86MB graph + 402MB weights)
- * lets ORT load only the tiny graph, then stream weights via URL pass-through
- * directly into WASM memory. JS heap stays at ~2MB.
+ * Provides a unified API that always tries Wav2Vec2 (LAM fp16) first:
+ * - All platforms: Tries Wav2Vec2Inference (192MB fp16, external data format)
+ * - Fallback: Gracefully falls back to wav2arkit_cpu if GPU model fails to load
+ *
+ * The fp16 external data format (385KB graph + 192MB weights) enables iOS support:
+ * - URL pass-through: ORT streams weights directly into WASM memory (~2MB JS heap)
+ * - Basic graph optimization: avoids ~750-950MB peak from 'all' optimization
+ * - If iOS OOMs during session creation, A2EWithFallback catches it and loads
+ *   wav2arkit_cpu (1.86MB graph + 402MB weights) as a safe fallback.
  *
  * @category Inference
  *
@@ -4404,12 +4399,15 @@ declare const ALL_AUS: string[];
  */
 /**
- * Resolved emotion split into upper and lower face contributions
+ * Resolved emotion split into upper and lower face contributions.
+ *
+ * WARNING: Buffers are owned by EmotionResolver and are overwritten
+ * on the next resolve() call. Copy if you need to retain values.
  */
 interface ResolvedEmotion {
-    /** 52 channels — only upper face (brows, eyes, cheeks, nose) non-zero */
+    /** 52 channels — only upper face non-zero. Valid until next resolve() call. */
     upper: Float32Array;
-    /** 52 channels — only lower face (mouth, jaw) non-zero */
+    /** 52 channels — only lower face non-zero. Valid until next resolve() call. */
     lower: Float32Array;
 }
 /**
@@ -4444,6 +4442,28 @@ declare class EmotionResolver {
  * @category Face
  */
+/**
+ * Output of FaceCompositor.compose()
+ *
+ * WARNING: When using the internal output buffer (no `target` param),
+ * `blendshapes` is a shared reference that is overwritten on the next
+ * compose() call. Copy with `new Float32Array(output.blendshapes)` if
+ * you need to retain values across frames.
+ */
+interface FaceCompositorOutput {
+    /**
+     * 52 ARKit blendshape values, clamped [0,1].
+     *
+     * This buffer is reused across calls when no `target` parameter is
+     * provided to compose(). Valid until the next compose() call.
+     */
+    blendshapes: Float32Array;
+    /** Head rotation deltas in radians (from ProceduralLifeLayer) */
+    headDelta: {
+        yaw: number;
+        pitch: number;
+    };
+}
 /**
  * Per-blendshape character profile (multiplier + offset)
  *
@@ -4494,13 +4514,14 @@ interface FaceCompositorInput extends LifeLayerInput {
  *   audioEnergy: 0.5,
  * });
  *
- * // Apply output[0..51] to avatar morphTargetInfluences
+ * // Apply output.blendshapes[0..51] to avatar morphTargetInfluences
  * ```
  */
 declare class FaceCompositor {
     private readonly emotionResolver;
     private readonly lifeLayer;
     private readonly emotionSmoothing;
+    private readonly outputBuffer;
     private readonly smoothedUpper;
     private readonly smoothedLower;
     private readonly lifeBuffer;
@@ -4513,9 +4534,11 @@ declare class FaceCompositor {
      *
      * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
      * @param input - Per-frame input (deltaTime, emotion, life layer params)
-     * @returns Float32Array[52] with all values clamped to [0, 1]
+     * @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
+     *   When omitted, an internal buffer is used (valid until next compose() call).
+     * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
      */
-    compose(base: Float32Array, input: FaceCompositorInput): Float32Array;
+    compose(base: Float32Array, input: FaceCompositorInput, target?: Float32Array): FaceCompositorOutput;
     /**
      * Set sticky emotion (used when input.emotion is not provided).
      */
@@ -4817,4 +4840,4 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private clearSilenceTimer;
 }
-export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
+export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, type CharacterProfile, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, VoicePipeline, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineState, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resetModelUrls, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };

package/dist/index.js CHANGED Viewed

@@ -2613,7 +2613,7 @@ async function getOnnxRuntimeForPreference(preference = "auto") {
   const ort = await getOnnxRuntime(backend);
   return { ort, backend };
 }
-function getSessionOptions(backend) {
+function getSessionOptions(backend, config) {
   if (backend === "webgpu") {
     return {
       executionProviders: [
@@ -2629,7 +2629,7 @@ function getSessionOptions(backend) {
   if (isIOS()) {
     return {
       executionProviders: ["wasm"],
-      graphOptimizationLevel: "basic",
+      graphOptimizationLevel: config?.iosDisableOptimization ? "disabled" : "basic",
       enableCpuMemArena: false,
       enableMemPattern: false
     };
@@ -2896,7 +2896,7 @@ var _Wav2Vec2Inference = class _Wav2Vec2Inference {
       logger3.info("ONNX Runtime loaded", { backend: this._backend });
       const modelUrl = this.config.modelUrl;
       const dataUrl = this.config.externalDataUrl !== false ? typeof this.config.externalDataUrl === "string" ? this.config.externalDataUrl : `${modelUrl}.data` : null;
-      const sessionOptions = getSessionOptions(this._backend);
+      const sessionOptions = getSessionOptions(this._backend, { iosDisableOptimization: true });
       let isCached = false;
       if (isIOS()) {
         logger3.info("iOS: passing model URLs directly to ORT (low-memory path)", {
@@ -7739,10 +7739,11 @@ function createA2E(config = {}) {
     useCpu = false;
     logger12.info("Forcing GPU A2E model (Wav2Vec2)");
   } else {
-    useCpu = shouldUseCpuA2E();
-    logger12.info("Auto-detected A2E model", {
-      useCpu,
-      isSafari: isSafari()
+    useCpu = false;
+    logger12.info("Auto-detected A2E model: trying GPU first (fp16 external data)", {
+      isSafari: isSafari(),
+      isIOS: isIOS(),
+      fallbackOnError
     });
   }
   if (useCpu) {
@@ -7804,6 +7805,7 @@ var A2EWithFallback = class {
     }
   }
   async fallbackToCpu(reason) {
+    console.error("[A2EWithFallback] GPU\u2192CPU FALLBACK TRIGGERED. Reason:", reason);
     logger12.warn("GPU model load failed, falling back to CPU model", { reason });
     try {
       await this.implementation.dispose();
@@ -10672,10 +10674,7 @@ var EmotionResolver = class {
       if (upper[i] > 1) upper[i] = 1;
       if (lower[i] > 1) lower[i] = 1;
     }
-    return {
-      upper: new Float32Array(upper),
-      lower: new Float32Array(lower)
-    };
+    return { upper, lower };
   }
 };
@@ -10698,6 +10697,7 @@ var FaceCompositor = class {
   constructor(config) {
     this.emotionResolver = new EmotionResolver();
     // Pre-allocated buffers
+    this.outputBuffer = new Float32Array(52);
     this.smoothedUpper = new Float32Array(52);
     this.smoothedLower = new Float32Array(52);
     this.lifeBuffer = new Float32Array(52);
@@ -10715,10 +10715,12 @@ var FaceCompositor = class {
    *
    * @param base - A2E raw output (Float32Array[52], LAM_BLENDSHAPES order)
    * @param input - Per-frame input (deltaTime, emotion, life layer params)
-   * @returns Float32Array[52] with all values clamped to [0, 1]
+   * @param target - Optional pre-allocated output buffer (avoids per-frame allocation).
+   *   When omitted, an internal buffer is used (valid until next compose() call).
+   * @returns Blendshapes (Float32Array[52] clamped [0,1]) and head rotation deltas
    */
-  compose(base, input) {
-    const out = new Float32Array(52);
+  compose(base, input, target) {
+    const out = target ?? this.outputBuffer;
     out.set(base);
     const emotion = input.emotion ?? this.stickyEmotion;
     if (emotion) {
@@ -10740,7 +10742,14 @@ var FaceCompositor = class {
         out[i] *= 1 + this.smoothedLower[i] * bilabialSuppress;
       }
     }
-    this.lifeLayer.updateToArray(input.deltaTime, input, this.lifeBuffer);
+    const lifeResult = this.lifeLayer.update(input.deltaTime, input);
+    this.lifeBuffer.fill(0);
+    for (const [name, value] of Object.entries(lifeResult.blendshapes)) {
+      const idx = BS_INDEX2.get(name);
+      if (idx !== void 0) {
+        this.lifeBuffer[idx] = value;
+      }
+    }
     for (let i = 0; i < 52; i++) {
       if (IS_EYE_CHANNEL[i]) {
         out[i] = this.lifeBuffer[i];
@@ -10755,7 +10764,7 @@ var FaceCompositor = class {
       if (out[i] < 0) out[i] = 0;
       else if (out[i] > 1) out[i] = 1;
     }
-    return out;
+    return { blendshapes: out, headDelta: lifeResult.headDelta };
   }
   /**
    * Set sticky emotion (used when input.emotion is not provided).
@@ -11042,6 +11051,7 @@ var VoicePipeline = class extends EventEmitter {
           new Promise((r) => setTimeout(() => r("timeout"), timeoutMs))
         ]);
         if (lamLoadResult === "timeout") {
+          console.error(`[VoicePipeline] LAM TIMEOUT after ${timeoutMs}ms \u2014 forcing CPU fallback`);
           logger19.warn(`LAM GPU load timed out after ${timeoutMs}ms, falling back to CPU`);
           await lam.dispose();
           lam = createA2E({