@omote/core 0.3.25 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,6 @@
1
1
  import {
2
2
  EventEmitter
3
3
  } from "../chunk-XK22BRG4.mjs";
4
- import "../chunk-NSSMTXJJ.mjs";
5
4
  export {
6
5
  EventEmitter
7
6
  };
package/dist/index.d.mts CHANGED
@@ -863,133 +863,207 @@ declare function isOnnxRuntimeLoaded(): boolean;
863
863
  declare function preloadOnnxRuntime(preference?: BackendPreference): Promise<RuntimeBackend>;
864
864
 
865
865
  /**
866
- * Whisper Automatic Speech Recognition using transformers.js
867
- * Uses Xenova's proven pipeline API for reliable transcription
868
- */
869
- type WhisperModel = 'tiny' | 'base' | 'small' | 'medium';
870
- type WhisperDtype = 'fp32' | 'fp16' | 'q8' | 'int8' | 'uint8' | 'q4' | 'q4f16' | 'bnb4';
871
- interface WhisperConfig {
872
- /** Model size: tiny (~75MB), base (~150MB), small (~500MB), medium (~1.5GB) */
873
- model?: WhisperModel;
874
- /** Use multilingual model (default: false, uses .en models) */
875
- multilingual?: boolean;
876
- /** Language code (e.g., 'en', 'es', 'fr') - for multilingual models */
877
- language?: string;
878
- /** Task: transcribe or translate (default: transcribe) */
879
- task?: 'transcribe' | 'translate';
880
- /** Model quantization format (default: 'q8' for balance of speed/quality) */
881
- dtype?: WhisperDtype;
882
- /** Use WebGPU acceleration if available (default: auto-detect) */
883
- device?: 'auto' | 'webgpu' | 'wasm';
884
- /** Local model path (e.g., '/models/whisper-tiny.en') - overrides HuggingFace CDN */
885
- localModelPath?: string;
886
- /** HuggingFace API token to bypass rate limits (get from https://huggingface.co/settings/tokens) */
887
- token?: string;
888
- /** Suppress non-speech tokens like [LAUGHTER], [CLICKING], etc. (default: true) */
889
- suppressNonSpeech?: boolean;
866
+ * SenseVoice automatic speech recognition using ONNX Runtime Web
867
+ *
868
+ * Non-autoregressive CTC-based ASR that is 5x faster than Whisper-Small.
869
+ * Runs entirely in browser via WebGPU or WASM. No transformers.js dependency.
870
+ *
871
+ * Uses the sherpa-onnx SenseVoice export (model.int8.onnx, 239MB int8 quantized).
872
+ * Also provides emotion detection, language identification, and audio event detection
873
+ * from the same forward pass.
874
+ *
875
+ * @category Inference
876
+ *
877
+ * @example Basic usage
878
+ * ```typescript
879
+ * import { SenseVoiceInference } from '@omote/core';
880
+ *
881
+ * const asr = new SenseVoiceInference({
882
+ * modelUrl: '/models/sensevoice/model.int8.onnx',
883
+ * tokensUrl: '/models/sensevoice/tokens.txt',
884
+ * });
885
+ * await asr.load();
886
+ *
887
+ * const { text, emotion, language } = await asr.transcribe(audioSamples);
888
+ * console.log(text); // "Hello world"
889
+ * console.log(emotion); // "NEUTRAL"
890
+ * console.log(language); // "en"
891
+ * ```
892
+ *
893
+ * @module inference/SenseVoiceInference
894
+ */
895
+
896
+ type SenseVoiceLanguage = 'auto' | 'zh' | 'en' | 'ja' | 'ko' | 'yue';
897
+ interface SenseVoiceConfig {
898
+ /** Path or URL to model.int8.onnx (239MB) */
899
+ modelUrl: string;
900
+ /** Path or URL to tokens.txt vocabulary file (default: sibling of modelUrl) */
901
+ tokensUrl?: string;
902
+ /** Language hint (default: 'auto' for auto-detection) */
903
+ language?: SenseVoiceLanguage;
904
+ /** Text normalization: 'with_itn' applies inverse text normalization (default: 'with_itn') */
905
+ textNorm?: 'with_itn' | 'without_itn';
906
+ /** Preferred backend (default: 'auto') */
907
+ backend?: BackendPreference;
890
908
  }
891
- interface TranscriptionResult {
909
+ interface SenseVoiceResult {
892
910
  /** Transcribed text */
893
911
  text: string;
894
- /** Detected/used language */
895
- language: string;
896
- /** Inference time in ms */
912
+ /** Detected language (e.g., 'zh', 'en', 'ja', 'ko', 'yue') */
913
+ language?: string;
914
+ /** Detected emotion (e.g., 'HAPPY', 'SAD', 'ANGRY', 'NEUTRAL') */
915
+ emotion?: string;
916
+ /** Detected audio event (e.g., 'Speech', 'BGM', 'Laughter') */
917
+ event?: string;
918
+ /** Inference time in milliseconds (preprocessing + model + decode) */
897
919
  inferenceTimeMs: number;
898
- /** Full chunks with timestamps (if requested) */
899
- chunks?: Array<{
900
- text: string;
901
- timestamp: [number, number | null];
902
- }>;
920
+ /** Preprocessing time in milliseconds (fbank + LFR + CMVN) */
921
+ preprocessTimeMs: number;
903
922
  }
904
- /**
905
- * Whisper ASR inference using transformers.js pipeline API
906
- *
907
- * Features:
908
- * - Automatic WebGPU/WASM backend selection
909
- * - Streaming support with chunk callbacks
910
- * - Proven implementation from Xenova's demo
911
- * - Handles all audio preprocessing automatically
912
- */
913
- declare class WhisperInference {
923
+ interface SenseVoiceModelInfo {
924
+ backend: RuntimeBackend;
925
+ loadTimeMs: number;
926
+ inputNames: string[];
927
+ outputNames: string[];
928
+ vocabSize: number;
929
+ }
930
+ declare class SenseVoiceInference {
931
+ private session;
932
+ private ort;
914
933
  private config;
915
- private pipeline;
916
- private currentModel;
934
+ private _backend;
917
935
  private isLoading;
918
- private actualBackend;
919
- constructor(config?: WhisperConfig);
920
- /**
921
- * Check if WebGPU is available in this browser
922
- */
923
- static isWebGPUAvailable(): Promise<boolean>;
924
- /**
925
- * Load the Whisper model pipeline
926
- */
927
- load(onProgress?: (progress: {
928
- status: string;
929
- progress?: number;
930
- file?: string;
931
- }) => void): Promise<void>;
932
- /**
933
- * Transcribe audio to text
934
- *
935
- * @param audio Audio samples (Float32Array, 16kHz mono)
936
- * @param options Transcription options
937
- */
938
- transcribe(audio: Float32Array, options?: {
939
- /** Return timestamps for each chunk */
940
- returnTimestamps?: boolean;
941
- /** Chunk length in seconds (default: 30) */
942
- chunkLengthS?: number;
943
- /** Stride length in seconds for overlapping chunks (default: 5) */
944
- strideLengthS?: number;
945
- /** Language override */
946
- language?: string;
947
- /** Task override */
948
- task?: 'transcribe' | 'translate';
949
- }): Promise<TranscriptionResult>;
950
- /**
951
- * Transcribe with streaming chunks (progressive results)
952
- *
953
- * @param audio Audio samples
954
- * @param onChunk Called when each chunk is finalized
955
- * @param onUpdate Called after each generation step (optional)
956
- */
957
- transcribeStreaming(audio: Float32Array, onChunk: (chunk: {
958
- text: string;
959
- timestamp: [number, number | null];
960
- }) => void, onUpdate?: (text: string) => void, options?: {
961
- chunkLengthS?: number;
962
- strideLengthS?: number;
963
- language?: string;
964
- task?: 'transcribe' | 'translate';
965
- }): Promise<TranscriptionResult>;
966
- /**
967
- * Dispose of the model and free resources
968
- */
969
- dispose(): Promise<void>;
970
- /**
971
- * Check if model is loaded
972
- */
936
+ private inferenceQueue;
937
+ private tokenMap;
938
+ private negMean;
939
+ private invStddev;
940
+ private languageId;
941
+ private textNormId;
942
+ constructor(config: SenseVoiceConfig);
943
+ get backend(): RuntimeBackend | null;
973
944
  get isLoaded(): boolean;
945
+ load(onProgress?: (loaded: number, total: number) => void): Promise<SenseVoiceModelInfo>;
974
946
  /**
975
- * Get the backend being used (webgpu or wasm)
976
- */
977
- get backend(): string;
978
- /**
979
- * Get the full model name used by transformers.js
980
- */
981
- private getModelName;
982
- /**
983
- * Remove non-speech event tokens from transcription
984
- *
985
- * Whisper outputs special tokens for non-speech events like:
986
- * [LAUGHTER], [APPLAUSE], [MUSIC], [BLANK_AUDIO], [CLICKING], etc.
947
+ * Transcribe audio samples to text
987
948
  *
988
- * This method strips these tokens and cleans up extra whitespace.
949
+ * @param audioSamples Float32Array of audio samples at 16kHz, [-1, 1] range
950
+ * @returns Transcription result with text, emotion, language, and event
989
951
  */
990
- private removeNonSpeechTokens;
952
+ transcribe(audioSamples: Float32Array): Promise<SenseVoiceResult>;
953
+ private queueInference;
954
+ dispose(): Promise<void>;
991
955
  }
992
956
 
957
+ /**
958
+ * Kaldi-compatible filterbank (fbank) feature extraction
959
+ *
960
+ * Pure TypeScript implementation matching kaldi-native-fbank parameters
961
+ * used by SenseVoice. No external dependencies.
962
+ *
963
+ * Pipeline: audio → framing → windowing → FFT → power spectrum → mel filterbank → log
964
+ *
965
+ * @module inference/kaldiFbank
966
+ */
967
+ interface KaldiFbankOptions {
968
+ /** Frame length in ms (default: 25) */
969
+ frameLengthMs?: number;
970
+ /** Frame shift in ms (default: 10) */
971
+ frameShiftMs?: number;
972
+ /** Low frequency cutoff in Hz (default: 20) */
973
+ lowFreq?: number;
974
+ /** High frequency cutoff in Hz (default: sampleRate / 2) */
975
+ highFreq?: number;
976
+ /** Dither amount (default: 0 for deterministic output) */
977
+ dither?: number;
978
+ /** Preemphasis coefficient (default: 0.97) */
979
+ preemphasis?: number;
980
+ }
981
+ /**
982
+ * Compute Kaldi-compatible log mel filterbank features
983
+ *
984
+ * @param audio Raw audio samples (float32, [-1, 1] range)
985
+ * @param sampleRate Sample rate in Hz (must be 16000 for SenseVoice)
986
+ * @param numMelBins Number of mel bins (80 for SenseVoice)
987
+ * @param opts Optional parameters
988
+ * @returns Flattened Float32Array of shape [numFrames, numMelBins]
989
+ */
990
+ declare function computeKaldiFbank(audio: Float32Array, sampleRate: number, numMelBins: number, opts?: KaldiFbankOptions): Float32Array;
991
+ /**
992
+ * Apply Low Frame Rate stacking for SenseVoice
993
+ *
994
+ * Concatenates lfrM consecutive frames with stride lfrN.
995
+ * Left-pads with copies of first frame, right-pads last group.
996
+ *
997
+ * @param features Flattened [numFrames, featureDim]
998
+ * @param featureDim Feature dimension per frame (e.g., 80)
999
+ * @param lfrM Number of frames to stack (default: 7)
1000
+ * @param lfrN Stride (default: 6)
1001
+ * @returns Flattened [numOutputFrames, featureDim * lfrM]
1002
+ */
1003
+ declare function applyLFR(features: Float32Array, featureDim: number, lfrM?: number, lfrN?: number): Float32Array;
1004
+ /**
1005
+ * Apply CMVN normalization in-place
1006
+ *
1007
+ * Formula: normalized[i] = (features[i] + negMean[i % dim]) * invStddev[i % dim]
1008
+ *
1009
+ * @param features Flattened feature array (modified in-place)
1010
+ * @param dim Feature dimension (560 for SenseVoice after LFR)
1011
+ * @param negMean Negative mean vector (dim-dimensional)
1012
+ * @param invStddev Inverse standard deviation vector (dim-dimensional)
1013
+ * @returns The same features array (for chaining)
1014
+ */
1015
+ declare function applyCMVN(features: Float32Array, dim: number, negMean: Float32Array, invStddev: Float32Array): Float32Array;
1016
+ /**
1017
+ * Parse CMVN vectors from comma-separated strings (stored in ONNX metadata)
1018
+ *
1019
+ * The sherpa-onnx SenseVoice export stores neg_mean and inv_stddev
1020
+ * as comma-separated float strings in the model's metadata.
1021
+ */
1022
+ declare function parseCMVNFromMetadata(negMeanStr: string, invStddevStr: string): {
1023
+ negMean: Float32Array;
1024
+ invStddev: Float32Array;
1025
+ };
1026
+
1027
+ /**
1028
+ * CTC greedy decoder for SenseVoice
1029
+ *
1030
+ * Decodes CTC logits into text with structured token parsing
1031
+ * for language, emotion, and audio event detection.
1032
+ *
1033
+ * @module inference/ctcDecoder
1034
+ */
1035
+ interface CTCDecodeResult {
1036
+ /** Decoded text (speech content only) */
1037
+ text: string;
1038
+ /** Detected language (e.g., 'zh', 'en', 'ja', 'ko', 'yue') */
1039
+ language?: string;
1040
+ /** Detected emotion (e.g., 'HAPPY', 'SAD', 'ANGRY', 'NEUTRAL') */
1041
+ emotion?: string;
1042
+ /** Detected audio event (e.g., 'Speech', 'BGM', 'Laughter') */
1043
+ event?: string;
1044
+ }
1045
+ /** Resolve language string to SenseVoice language ID */
1046
+ declare function resolveLanguageId(language: string): number;
1047
+ /** Resolve text norm string to SenseVoice text norm ID */
1048
+ declare function resolveTextNormId(textNorm: string): number;
1049
+ /**
1050
+ * Parse tokens.txt into a token ID → string map
1051
+ *
1052
+ * Format: each line is "token_string token_id"
1053
+ * e.g., "<unk> 0", "▁the 3", "s 4"
1054
+ */
1055
+ declare function parseTokensFile(content: string): Map<number, string>;
1056
+ /**
1057
+ * CTC greedy decode
1058
+ *
1059
+ * @param logits Raw logits from model output, flattened [seqLen, vocabSize]
1060
+ * @param seqLen Sequence length (time steps)
1061
+ * @param vocabSize Vocabulary size
1062
+ * @param tokenMap Token ID → string map from tokens.txt
1063
+ * @returns Decoded text and structured metadata
1064
+ */
1065
+ declare function ctcGreedyDecode(logits: Float32Array, seqLen: number, vocabSize: number, tokenMap: Map<number, string>): CTCDecodeResult;
1066
+
993
1067
  /**
994
1068
  * Shared blendshape constants and utilities for lip sync inference
995
1069
  *
@@ -1234,6 +1308,16 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
1234
1308
  * - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (384MB, WebGPU)
1235
1309
  * - Fallback: Gracefully falls back to CPU model if GPU model fails to load
1236
1310
  *
1311
+ * Why two separate models?
1312
+ * Wav2Vec2 (LAM) cannot run on Safari/iOS for two reasons:
1313
+ * 1. Its dual-head transformer graph needs ~750-950MB peak during ORT session
1314
+ * creation (graph optimization), exceeding iOS WebKit's ~1-1.5GB tab limit.
1315
+ * 2. It ships as a single 384MB .onnx file that must load into JS heap before
1316
+ * ORT can consume it. iOS WebKit OOMs on this allocation.
1317
+ * wav2arkit_cpu solves both: external data format (1.86MB graph + 402MB weights)
1318
+ * lets ORT load only the tiny graph, then stream weights via URL pass-through
1319
+ * directly into WASM memory. JS heap stays at ~2MB.
1320
+ *
1237
1321
  * @category Inference
1238
1322
  *
1239
1323
  * @example Auto-detect (recommended)
@@ -2485,7 +2569,7 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
2485
2569
  private _state;
2486
2570
  private _sessionId;
2487
2571
  private _isConnected;
2488
- private whisper;
2572
+ private asr;
2489
2573
  private vad;
2490
2574
  private lam;
2491
2575
  private emotionController;
@@ -2529,7 +2613,7 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
2529
2613
  healthCheck(): Promise<boolean>;
2530
2614
  private setState;
2531
2615
  private getAuthToken;
2532
- private initWhisper;
2616
+ private initASR;
2533
2617
  private initLAM;
2534
2618
  private initPipeline;
2535
2619
  private connectWebSocket;
@@ -3223,148 +3307,6 @@ declare function preloadModels(urls: string[], onProgress?: (current: number, to
3223
3307
  */
3224
3308
  declare function formatBytes(bytes: number): string;
3225
3309
 
3226
- /**
3227
- * HuggingFace CDN Utilities
3228
- *
3229
- * Helper functions for working with HuggingFace CDN URLs.
3230
- * Used by transformers.js models (Whisper, etc.) for model downloads.
3231
- *
3232
- * @category Cache
3233
- */
3234
- /**
3235
- * Test URL for HuggingFace CDN reachability check.
3236
- * Uses a small, stable file from a well-known public model.
3237
- */
3238
- declare const HF_CDN_TEST_URL = "https://huggingface.co/Xenova/whisper-tiny/resolve/main/config.json";
3239
- /**
3240
- * Parsed HuggingFace URL components
3241
- */
3242
- interface HuggingFaceUrlInfo {
3243
- /** Organization or username */
3244
- org: string;
3245
- /** Model name */
3246
- model: string;
3247
- /** Branch, tag, or commit */
3248
- branch: string;
3249
- /** File path within the repository */
3250
- file: string;
3251
- }
3252
- /**
3253
- * Parse a HuggingFace CDN URL into its components
3254
- *
3255
- * @param url - The HuggingFace URL to parse
3256
- * @returns Parsed URL info or null if not a valid HF URL
3257
- *
3258
- * @example
3259
- * ```typescript
3260
- * const info = parseHuggingFaceUrl(
3261
- * 'https://huggingface.co/openai/whisper-tiny/resolve/main/model.onnx'
3262
- * );
3263
- * // Returns: { org: 'openai', model: 'whisper-tiny', branch: 'main', file: 'model.onnx' }
3264
- * ```
3265
- */
3266
- declare function parseHuggingFaceUrl(url: string): HuggingFaceUrlInfo | null;
3267
- /**
3268
- * Check if HuggingFace CDN is reachable
3269
- *
3270
- * Performs a HEAD request to a known HuggingFace model file to verify
3271
- * connectivity. Useful for offline detection or network diagnostics.
3272
- *
3273
- * @param testUrl - Optional custom URL to test (defaults to HF_CDN_TEST_URL)
3274
- * @returns True if CDN is reachable, false otherwise
3275
- *
3276
- * @example
3277
- * ```typescript
3278
- * import { isHuggingFaceCDNReachable } from '@omote/core';
3279
- *
3280
- * const reachable = await isHuggingFaceCDNReachable();
3281
- * if (!reachable) {
3282
- * console.log('HuggingFace CDN unreachable - running offline?');
3283
- * // Fall back to cached models or show error
3284
- * }
3285
- * ```
3286
- */
3287
- declare function isHuggingFaceCDNReachable(testUrl?: string): Promise<boolean>;
3288
-
3289
- /**
3290
- * Utility to clear transformers.js Cache API storage
3291
- *
3292
- * Problem: transformers.js v4 uses Browser Cache API which persists across hard refreshes.
3293
- * If an HTML error page gets cached (due to network errors, CDN issues, or dev server restarts),
3294
- * it will be served instead of JSON files, causing JSON.parse() errors.
3295
- *
3296
- * Solution: Manually clear Cache API storage before loading models.
3297
- *
3298
- * @module utils/transformersCacheClear
3299
- */
3300
- /**
3301
- * Clear all transformers.js and HuggingFace caches from Browser Cache API
3302
- *
3303
- * This clears:
3304
- * - transformers-cache (default cache key)
3305
- * - Any caches with 'transformers' or 'huggingface' in the name
3306
- *
3307
- * @param options Configuration options
3308
- * @returns Promise resolving to array of deleted cache names
3309
- */
3310
- declare function clearTransformersCache(options?: {
3311
- /** Whether to log deletion details (default: true) */
3312
- verbose?: boolean;
3313
- /** Additional cache name patterns to clear (e.g., ['my-custom-cache']) */
3314
- additionalPatterns?: string[];
3315
- }): Promise<string[]>;
3316
- /**
3317
- * Clear a specific cache by exact name
3318
- *
3319
- * @param cacheName Exact cache name to delete
3320
- * @returns Promise resolving to true if deleted, false otherwise
3321
- */
3322
- declare function clearSpecificCache(cacheName: string): Promise<boolean>;
3323
- /**
3324
- * List all cache names currently stored
3325
- *
3326
- * @returns Promise resolving to array of cache names
3327
- */
3328
- declare function listCaches(): Promise<string[]>;
3329
- /**
3330
- * Check if a specific cached response is valid JSON/binary (not HTML error page)
3331
- *
3332
- * @param cacheName Cache name to check
3333
- * @param requestUrl URL/key to check
3334
- * @returns Promise resolving to validation result
3335
- */
3336
- declare function validateCachedResponse(cacheName: string, requestUrl: string): Promise<{
3337
- exists: boolean;
3338
- valid: boolean;
3339
- contentType: string | null;
3340
- isHtml: boolean;
3341
- reason?: string;
3342
- }>;
3343
- /**
3344
- * Scan all caches for potentially invalid cached responses
3345
- *
3346
- * @returns Promise resolving to report of invalid entries
3347
- */
3348
- declare function scanForInvalidCaches(): Promise<{
3349
- totalCaches: number;
3350
- scannedEntries: number;
3351
- invalidEntries: Array<{
3352
- cacheName: string;
3353
- url: string;
3354
- reason: string;
3355
- }>;
3356
- }>;
3357
- /**
3358
- * Clear all caches and optionally prevent re-creation (development mode)
3359
- *
3360
- * WARNING: This is aggressive and should only be used in development.
3361
- * It clears ALL browser caches, not just transformers.js.
3362
- *
3363
- * @param preventRecreation If true, sets env.useBrowserCache = false
3364
- * @returns Promise resolving to number of deleted caches
3365
- */
3366
- declare function nukeBrowserCaches(preventRecreation?: boolean): Promise<number>;
3367
-
3368
3310
  /**
3369
3311
  * Telemetry Types
3370
3312
  *
@@ -4127,4 +4069,4 @@ declare class EmphasisDetector {
4127
4069
  reset(): void;
4128
4070
  }
4129
4071
 
4130
- export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes, validateCachedResponse };
4072
+ export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };