npm - @omote/core - Versions diffs - 0.4.3 → 0.4.5 - Mend

@omote/core 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.mjs';
+import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.mjs';
 export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
 import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
 export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
@@ -1412,6 +1412,8 @@ declare class SenseVoiceInference {
     private _backend;
     private isLoading;
     private inferenceQueue;
+    private poisoned;
+    private static readonly INFERENCE_TIMEOUT_MS;
     private tokenMap;
     private negMean;
     private invStddev;
@@ -1755,6 +1757,8 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
     private _backend;
     private isLoading;
     private inferenceQueue;
+    private poisoned;
+    private static readonly INFERENCE_TIMEOUT_MS;
     constructor(config: Wav2ArkitCpuConfig);
     get backend(): RuntimeBackend | null;
     get isLoaded(): boolean;
@@ -1946,7 +1950,7 @@ interface VADModelInfo {
 /**
  * Result from a single VAD inference
  */
-interface VADResult$1 {
+interface VADResult {
     /** Speech probability (0-1) */
     probability: number;
     /** Whether speech is detected (probability > threshold) */
@@ -2027,7 +2031,7 @@ declare class SileroVADInference {
      * @param audioChunk - Float32Array of exactly chunkSize samples (512 for 16kHz, 256 for 8kHz)
      * @returns VAD result with speech probability
      */
-    process(audioChunk: Float32Array): Promise<VADResult$1>;
+    process(audioChunk: Float32Array): Promise<VADResult>;
     /**
      * Process audio and detect speech segments
      *
@@ -2043,10 +2047,6 @@ declare class SileroVADInference {
         /** Padding to add before/after speech in ms (default: 30) */
         speechPadMs?: number;
     }): Promise<SpeechSegment[]>;
-    /**
-     * Calculate RMS energy of audio chunk
-     */
-    private calculateRMS;
     /**
      * Queue inference to serialize ONNX session calls
      */
@@ -2057,6 +2057,37 @@ declare class SileroVADInference {
     dispose(): Promise<void>;
 }
+/**
+ * Silero VAD Web Worker implementation
+ *
+ * Runs Silero VAD inference in a dedicated Web Worker to prevent main thread blocking.
+ * Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
+ *
+ * Key design decisions:
+ * - WASM backend only (WebGPU doesn't work in Workers)
+ * - LSTM state serialized as Float32Array (Tensors can't cross worker boundary)
+ * - Audio copied (not transferred) to retain main thread access for pre-speech buffer
+ * - ONNX Runtime loaded from CDN in worker (no bundler complications)
+ *
+ * @category Inference
+ *
+ * @example Basic usage
+ * ```typescript
+ * import { SileroVADWorker } from '@omote/core';
+ *
+ * const vad = new SileroVADWorker({
+ *   modelUrl: '/models/silero-vad.onnx'
+ * });
+ * await vad.load();
+ *
+ * // Process 32ms chunks (512 samples at 16kHz)
+ * const result = await vad.process(audioChunk);
+ * if (result.isSpeech) {
+ *   console.log('Speech detected!', result.probability);
+ * }
+ * ```
+ */
 /**
  * Configuration for Silero VAD Worker
  */
@@ -2091,25 +2122,7 @@ interface VADWorkerModelInfo {
     sampleRate: number;
     chunkSize: number;
 }
-/**
- * Result from a single VAD inference
- */
-interface VADResult {
-    /** Speech probability (0-1) */
-    probability: number;
-    /** Whether speech is detected (probability > threshold) */
-    isSpeech: boolean;
-    /** Inference time in milliseconds */
-    inferenceTimeMs: number;
-    /**
-     * Pre-speech audio chunks (only present on first speech detection).
-     * These are the N chunks immediately before VAD triggered, useful for
-     * capturing the beginning of speech that occurred before detection.
-     *
-     * Only populated when transitioning from silence to speech.
-     */
-    preSpeechChunks?: Float32Array[];
-}
 /**
  * Silero VAD Worker - Voice Activity Detection in a Web Worker
  *
@@ -2257,7 +2270,7 @@ interface SileroVADBackend {
      * @param audioChunk - Float32Array of exactly chunkSize samples
      * @returns VAD result with speech probability
      */
-    process(audioChunk: Float32Array): Promise<VADResult$1>;
+    process(audioChunk: Float32Array): Promise<VADResult>;
     /**
      * Reset state for new audio stream
      */
@@ -2791,10 +2804,7 @@ interface ConversationMessage {
     /** Audio duration if applicable (ms) */
     audioDurationMs?: number;
 }
-/**
- * Session state
- */
-type AISessionState = 'idle' | 'listening' | 'thinking' | 'speaking' | 'interrupted' | 'error' | 'disconnected';
 /**
  * Events emitted by AI adapters
  */
@@ -3106,7 +3116,6 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
      * Falls back to simple RMS if VAD not available
      */
     private detectVoiceActivity;
-    private int16ToFloat32;
     private base64ToArrayBuffer;
     private addToHistory;
     private handleDisconnect;
@@ -4704,4 +4713,4 @@ declare class ProceduralLifeLayer {
     private updateBrowNoise;
 }
-export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };
+export { type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.js';
+import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.js';
 export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
 import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
 export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.js';
@@ -1412,6 +1412,8 @@ declare class SenseVoiceInference {
     private _backend;
     private isLoading;
     private inferenceQueue;
+    private poisoned;
+    private static readonly INFERENCE_TIMEOUT_MS;
     private tokenMap;
     private negMean;
     private invStddev;
@@ -1755,6 +1757,8 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
     private _backend;
     private isLoading;
     private inferenceQueue;
+    private poisoned;
+    private static readonly INFERENCE_TIMEOUT_MS;
     constructor(config: Wav2ArkitCpuConfig);
     get backend(): RuntimeBackend | null;
     get isLoaded(): boolean;
@@ -1946,7 +1950,7 @@ interface VADModelInfo {
 /**
  * Result from a single VAD inference
  */
-interface VADResult$1 {
+interface VADResult {
     /** Speech probability (0-1) */
     probability: number;
     /** Whether speech is detected (probability > threshold) */
@@ -2027,7 +2031,7 @@ declare class SileroVADInference {
      * @param audioChunk - Float32Array of exactly chunkSize samples (512 for 16kHz, 256 for 8kHz)
      * @returns VAD result with speech probability
      */
-    process(audioChunk: Float32Array): Promise<VADResult$1>;
+    process(audioChunk: Float32Array): Promise<VADResult>;
     /**
      * Process audio and detect speech segments
      *
@@ -2043,10 +2047,6 @@ declare class SileroVADInference {
         /** Padding to add before/after speech in ms (default: 30) */
         speechPadMs?: number;
     }): Promise<SpeechSegment[]>;
-    /**
-     * Calculate RMS energy of audio chunk
-     */
-    private calculateRMS;
     /**
      * Queue inference to serialize ONNX session calls
      */
@@ -2057,6 +2057,37 @@ declare class SileroVADInference {
     dispose(): Promise<void>;
 }
+/**
+ * Silero VAD Web Worker implementation
+ *
+ * Runs Silero VAD inference in a dedicated Web Worker to prevent main thread blocking.
+ * Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
+ *
+ * Key design decisions:
+ * - WASM backend only (WebGPU doesn't work in Workers)
+ * - LSTM state serialized as Float32Array (Tensors can't cross worker boundary)
+ * - Audio copied (not transferred) to retain main thread access for pre-speech buffer
+ * - ONNX Runtime loaded from CDN in worker (no bundler complications)
+ *
+ * @category Inference
+ *
+ * @example Basic usage
+ * ```typescript
+ * import { SileroVADWorker } from '@omote/core';
+ *
+ * const vad = new SileroVADWorker({
+ *   modelUrl: '/models/silero-vad.onnx'
+ * });
+ * await vad.load();
+ *
+ * // Process 32ms chunks (512 samples at 16kHz)
+ * const result = await vad.process(audioChunk);
+ * if (result.isSpeech) {
+ *   console.log('Speech detected!', result.probability);
+ * }
+ * ```
+ */
 /**
  * Configuration for Silero VAD Worker
  */
@@ -2091,25 +2122,7 @@ interface VADWorkerModelInfo {
     sampleRate: number;
     chunkSize: number;
 }
-/**
- * Result from a single VAD inference
- */
-interface VADResult {
-    /** Speech probability (0-1) */
-    probability: number;
-    /** Whether speech is detected (probability > threshold) */
-    isSpeech: boolean;
-    /** Inference time in milliseconds */
-    inferenceTimeMs: number;
-    /**
-     * Pre-speech audio chunks (only present on first speech detection).
-     * These are the N chunks immediately before VAD triggered, useful for
-     * capturing the beginning of speech that occurred before detection.
-     *
-     * Only populated when transitioning from silence to speech.
-     */
-    preSpeechChunks?: Float32Array[];
-}
 /**
  * Silero VAD Worker - Voice Activity Detection in a Web Worker
  *
@@ -2257,7 +2270,7 @@ interface SileroVADBackend {
      * @param audioChunk - Float32Array of exactly chunkSize samples
      * @returns VAD result with speech probability
      */
-    process(audioChunk: Float32Array): Promise<VADResult$1>;
+    process(audioChunk: Float32Array): Promise<VADResult>;
     /**
      * Reset state for new audio stream
      */
@@ -2791,10 +2804,7 @@ interface ConversationMessage {
     /** Audio duration if applicable (ms) */
     audioDurationMs?: number;
 }
-/**
- * Session state
- */
-type AISessionState = 'idle' | 'listening' | 'thinking' | 'speaking' | 'interrupted' | 'error' | 'disconnected';
 /**
  * Events emitted by AI adapters
  */
@@ -3106,7 +3116,6 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
      * Falls back to simple RMS if VAD not available
      */
     private detectVoiceActivity;
-    private int16ToFloat32;
     private base64ToArrayBuffer;
     private addToHistory;
     private handleDisconnect;
@@ -4704,4 +4713,4 @@ declare class ProceduralLifeLayer {
     private updateBrowNoise;
 }
-export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };
+export { type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };