@omote/core 0.3.1 → 0.3.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-T465MTDX.mjs → chunk-B6TIE56N.mjs} +63 -1153
- package/dist/chunk-B6TIE56N.mjs.map +1 -0
- package/dist/events/index.mjs +1 -1
- package/dist/index.d.mts +86 -45
- package/dist/index.d.ts +86 -45
- package/dist/index.js +313 -1428
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +241 -124
- package/dist/index.mjs.map +1 -1
- package/dist/logging/index.mjs +1 -1
- package/dist/{transformers.web-MHLR33H6.mjs → transformers.web-T5LWC34T.mjs} +3 -3
- package/package.json +2 -3
- package/dist/chunk-6W7G6WE7.mjs +0 -13
- package/dist/chunk-C3Y37HKD.mjs +0 -26378
- package/dist/chunk-C3Y37HKD.mjs.map +0 -1
- package/dist/chunk-RI6UQ7WF.mjs +0 -26378
- package/dist/chunk-RI6UQ7WF.mjs.map +0 -1
- package/dist/chunk-T465MTDX.mjs.map +0 -1
- package/dist/transformers.web-4C62MDO6.mjs +0 -1724
- package/dist/transformers.web-4C62MDO6.mjs.map +0 -1
- package/dist/transformers.web-ALDLCPHT.mjs +0 -1725
- package/dist/transformers.web-ALDLCPHT.mjs.map +0 -1
- package/dist/transformers.web-MHLR33H6.mjs.map +0 -1
- /package/dist/{chunk-6W7G6WE7.mjs.map → transformers.web-T5LWC34T.mjs.map} +0 -0
package/dist/events/index.mjs
CHANGED
package/dist/index.d.mts
CHANGED
|
@@ -27,11 +27,19 @@ declare class MicrophoneCapture {
|
|
|
27
27
|
private buffer;
|
|
28
28
|
private _isRecording;
|
|
29
29
|
private _loggedFirstChunk;
|
|
30
|
+
/** Actual AudioContext sample rate (may differ from target on Firefox) */
|
|
31
|
+
private _nativeSampleRate;
|
|
30
32
|
constructor(events: EventEmitter<OmoteEvents>, config?: MicrophoneCaptureConfig);
|
|
31
33
|
get isRecording(): boolean;
|
|
32
34
|
get isSupported(): boolean;
|
|
33
35
|
start(): Promise<void>;
|
|
34
36
|
stop(): void;
|
|
37
|
+
/**
|
|
38
|
+
* Resample audio using linear interpolation.
|
|
39
|
+
* Used when the AudioContext runs at the device's native rate (e.g. 48kHz)
|
|
40
|
+
* and we need to downsample to the target rate (e.g. 16kHz).
|
|
41
|
+
*/
|
|
42
|
+
private resample;
|
|
35
43
|
private floatToPCM16;
|
|
36
44
|
}
|
|
37
45
|
|
|
@@ -98,12 +106,11 @@ interface AudioSchedulerOptions {
|
|
|
98
106
|
/** Number of audio channels (default: 1 for mono) */
|
|
99
107
|
channels?: number;
|
|
100
108
|
/**
|
|
101
|
-
*
|
|
102
|
-
* Gives
|
|
103
|
-
*
|
|
104
|
-
* Default: 0.05 (50ms — just enough to enqueue the first node)
|
|
109
|
+
* Initial lookahead delay in seconds before first audio plays.
|
|
110
|
+
* Gives LAM inference time to compute blendshapes before audio starts.
|
|
111
|
+
* Default: 0.05 (50ms) for WebGPU, increase to 0.3-0.5 for WASM on iOS.
|
|
105
112
|
*/
|
|
106
|
-
|
|
113
|
+
initialLookaheadSec?: number;
|
|
107
114
|
}
|
|
108
115
|
declare class AudioScheduler {
|
|
109
116
|
private readonly options;
|
|
@@ -373,13 +380,14 @@ declare function isSafari(): boolean;
|
|
|
373
380
|
/**
|
|
374
381
|
* Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
|
|
375
382
|
*
|
|
376
|
-
* All
|
|
377
|
-
*
|
|
378
|
-
*
|
|
379
|
-
*
|
|
380
|
-
*
|
|
383
|
+
* All iOS browsers use WebKit and have tight memory limits — the 384MB
|
|
384
|
+
* LAM model causes silent crashes. wav2arkit_cpu uses URL pass-through
|
|
385
|
+
* (ORT fetches the 402MB weights directly into WASM, no JS heap copy).
|
|
386
|
+
*
|
|
387
|
+
* macOS Safari also needs this due to ONNX Runtime JSEP/ASYNCIFY bugs
|
|
388
|
+
* that crash WebKit's JIT compiler.
|
|
381
389
|
*
|
|
382
|
-
* @returns true if
|
|
390
|
+
* @returns true if iOS (any browser) or Safari (any platform)
|
|
383
391
|
*/
|
|
384
392
|
declare function shouldUseCpuLipSync(): boolean;
|
|
385
393
|
/**
|
|
@@ -400,7 +408,7 @@ declare function isSpeechRecognitionAvailable(): boolean;
|
|
|
400
408
|
* - Battery-efficient (no WASM overhead)
|
|
401
409
|
* - No model download needed (saves 30-150MB)
|
|
402
410
|
*
|
|
403
|
-
* @returns true if on iOS with Speech API available
|
|
411
|
+
* @returns true if on iOS or Safari with Speech API available
|
|
404
412
|
*/
|
|
405
413
|
declare function shouldUseNativeASR(): boolean;
|
|
406
414
|
/**
|
|
@@ -419,7 +427,7 @@ declare function shouldUseServerLipSync(): boolean;
|
|
|
419
427
|
/**
|
|
420
428
|
* Common interface for lip sync inference backends
|
|
421
429
|
*
|
|
422
|
-
* Both Wav2Vec2Inference (GPU, 384MB) and Wav2ArkitCpuInference (CPU,
|
|
430
|
+
* Both Wav2Vec2Inference (GPU, 384MB) and Wav2ArkitCpuInference (CPU, 404MB)
|
|
423
431
|
* implement this interface, allowing SyncedAudioPipeline and LAMPipeline to
|
|
424
432
|
* work with either model transparently.
|
|
425
433
|
*
|
|
@@ -454,19 +462,15 @@ interface LipSyncResult {
|
|
|
454
462
|
*
|
|
455
463
|
* Implemented by:
|
|
456
464
|
* - Wav2Vec2Inference (WebGPU/WASM, 384MB, ASR + lip sync)
|
|
457
|
-
* - Wav2ArkitCpuInference (WASM-only,
|
|
465
|
+
* - Wav2ArkitCpuInference (WASM-only, 404MB, lip sync only)
|
|
458
466
|
*/
|
|
459
467
|
interface LipSyncBackend {
|
|
468
|
+
/** Model identifier for backend-specific tuning (e.g. audio delay) */
|
|
469
|
+
readonly modelId: 'wav2vec2' | 'wav2arkit_cpu';
|
|
460
470
|
/** Current backend type (webgpu, wasm, or null if not loaded) */
|
|
461
471
|
readonly backend: RuntimeBackend | null;
|
|
462
472
|
/** Whether the model is loaded and ready for inference */
|
|
463
473
|
readonly isLoaded: boolean;
|
|
464
|
-
/**
|
|
465
|
-
* Preferred number of audio samples per inference chunk.
|
|
466
|
-
* Models with variable-length input can use smaller values for lower latency.
|
|
467
|
-
* Default (if undefined): 16000 (1.0s at 16kHz, required by Wav2Vec2).
|
|
468
|
-
*/
|
|
469
|
-
readonly chunkSamples?: number;
|
|
470
474
|
/**
|
|
471
475
|
* Load the ONNX model
|
|
472
476
|
* @returns Model loading information
|
|
@@ -529,7 +533,7 @@ interface LAMPipelineOptions {
|
|
|
529
533
|
}
|
|
530
534
|
declare class LAMPipeline {
|
|
531
535
|
private readonly options;
|
|
532
|
-
private readonly
|
|
536
|
+
private readonly REQUIRED_SAMPLES;
|
|
533
537
|
private readonly FRAME_RATE;
|
|
534
538
|
private buffer;
|
|
535
539
|
private bufferStartTime;
|
|
@@ -558,13 +562,15 @@ declare class LAMPipeline {
|
|
|
558
562
|
/**
|
|
559
563
|
* Get the frame that should be displayed at the current time
|
|
560
564
|
*
|
|
561
|
-
*
|
|
562
|
-
*
|
|
563
|
-
*
|
|
565
|
+
* Automatically removes frames that have already been displayed.
|
|
566
|
+
* This prevents memory leaks from accumulating old frames.
|
|
567
|
+
*
|
|
568
|
+
* Discard Window (prevents premature frame discarding):
|
|
569
|
+
* - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
|
|
570
|
+
* - WASM: 1.0s (LAM inference 50-500ms + higher variability)
|
|
564
571
|
*
|
|
565
|
-
*
|
|
566
|
-
*
|
|
567
|
-
* to natural 30fps pacing via timestamp gating.
|
|
572
|
+
* Last-Frame-Hold: Returns last valid frame instead of null to prevent
|
|
573
|
+
* avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
|
|
568
574
|
*
|
|
569
575
|
* @param currentTime - Current AudioContext time
|
|
570
576
|
* @param lam - LAM inference engine (optional, for backend detection)
|
|
@@ -592,7 +598,7 @@ declare class LAMPipeline {
|
|
|
592
598
|
/**
|
|
593
599
|
* Flush remaining buffered audio
|
|
594
600
|
*
|
|
595
|
-
* Processes any remaining audio in the buffer, even if less than
|
|
601
|
+
* Processes any remaining audio in the buffer, even if less than REQUIRED_SAMPLES.
|
|
596
602
|
* This ensures the final audio chunk generates blendshape frames.
|
|
597
603
|
*
|
|
598
604
|
* Should be called when audio stream ends to prevent losing the last 0-1 seconds.
|
|
@@ -645,6 +651,12 @@ interface SyncedAudioPipelineOptions {
|
|
|
645
651
|
chunkTargetMs?: number;
|
|
646
652
|
/** LAM inference engine */
|
|
647
653
|
lam: LipSyncBackend;
|
|
654
|
+
/**
|
|
655
|
+
* Audio playback delay in ms before first audio plays.
|
|
656
|
+
* Gives LAM inference time to pre-compute blendshapes.
|
|
657
|
+
* Default: auto-detected from lam.backend (50ms WebGPU, 350ms WASM).
|
|
658
|
+
*/
|
|
659
|
+
audioDelayMs?: number;
|
|
648
660
|
}
|
|
649
661
|
interface SyncedAudioPipelineEvents {
|
|
650
662
|
/** New frame ready for display */
|
|
@@ -838,6 +850,17 @@ declare function getLoadedBackend(): RuntimeBackend | null;
|
|
|
838
850
|
* Check if ONNX Runtime has been loaded
|
|
839
851
|
*/
|
|
840
852
|
declare function isOnnxRuntimeLoaded(): boolean;
|
|
853
|
+
/**
|
|
854
|
+
* Preload ONNX Runtime and compile the WASM binary early
|
|
855
|
+
*
|
|
856
|
+
* Call this before loading heavy resources (Three.js, VRM models) to ensure
|
|
857
|
+
* WASM memory is allocated in a clean JS heap, reducing iOS memory pressure.
|
|
858
|
+
* Uses the singleton pattern — subsequent model loading reuses this instance.
|
|
859
|
+
*
|
|
860
|
+
* @param preference Backend preference (default: 'auto')
|
|
861
|
+
* @returns The resolved backend that was loaded
|
|
862
|
+
*/
|
|
863
|
+
declare function preloadOnnxRuntime(preference?: BackendPreference): Promise<RuntimeBackend>;
|
|
841
864
|
|
|
842
865
|
/**
|
|
843
866
|
* Whisper Automatic Speech Recognition using transformers.js
|
|
@@ -1036,6 +1059,13 @@ type InferenceBackend = BackendPreference;
|
|
|
1036
1059
|
interface Wav2Vec2InferenceConfig {
|
|
1037
1060
|
/** Path or URL to the ONNX model */
|
|
1038
1061
|
modelUrl: string;
|
|
1062
|
+
/**
|
|
1063
|
+
* Path or URL to external model data file (.onnx.data weights).
|
|
1064
|
+
* Default: `${modelUrl}.data` (e.g., /models/model.onnx.data)
|
|
1065
|
+
*
|
|
1066
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1067
|
+
*/
|
|
1068
|
+
externalDataUrl?: string | false;
|
|
1039
1069
|
/** Preferred backend (auto will try WebGPU first, fallback to WASM) */
|
|
1040
1070
|
backend?: InferenceBackend;
|
|
1041
1071
|
/** Number of identity classes (default: 12 for streaming model) */
|
|
@@ -1066,7 +1096,8 @@ interface Wav2Vec2Result {
|
|
|
1066
1096
|
/** Inference time in ms */
|
|
1067
1097
|
inferenceTimeMs: number;
|
|
1068
1098
|
}
|
|
1069
|
-
declare class Wav2Vec2Inference {
|
|
1099
|
+
declare class Wav2Vec2Inference implements LipSyncBackend {
|
|
1100
|
+
readonly modelId: "wav2vec2";
|
|
1070
1101
|
private session;
|
|
1071
1102
|
private ort;
|
|
1072
1103
|
private config;
|
|
@@ -1116,12 +1147,16 @@ declare class Wav2Vec2Inference {
|
|
|
1116
1147
|
/**
|
|
1117
1148
|
* CPU-optimized lip sync inference using wav2arkit_cpu model
|
|
1118
1149
|
*
|
|
1119
|
-
* A
|
|
1120
|
-
* for
|
|
1150
|
+
* A Safari/iOS-compatible alternative to Wav2Vec2Inference (384MB) designed
|
|
1151
|
+
* for platforms where WebGPU crashes due to ONNX Runtime JSEP bugs.
|
|
1152
|
+
*
|
|
1153
|
+
* The model uses ONNX external data format:
|
|
1154
|
+
* - wav2arkit_cpu.onnx (1.86MB graph structure)
|
|
1155
|
+
* - wav2arkit_cpu.onnx.data (402MB weights)
|
|
1156
|
+
* Both files are fetched and cached automatically.
|
|
1121
1157
|
*
|
|
1122
1158
|
* Key differences from Wav2Vec2Inference:
|
|
1123
|
-
* - WASM-only backend (CPU-optimized, no WebGPU)
|
|
1124
|
-
* - 1.8MB model vs 384MB
|
|
1159
|
+
* - WASM-only backend (CPU-optimized, single-threaded, no WebGPU)
|
|
1125
1160
|
* - No identity input (baked to identity 11)
|
|
1126
1161
|
* - No ASR output (lip sync only)
|
|
1127
1162
|
* - Dynamic input length (not fixed to 16000 samples)
|
|
@@ -1146,12 +1181,18 @@ declare class Wav2Vec2Inference {
|
|
|
1146
1181
|
interface Wav2ArkitCpuConfig {
|
|
1147
1182
|
/** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
|
|
1148
1183
|
modelUrl: string;
|
|
1149
|
-
/**
|
|
1150
|
-
|
|
1184
|
+
/**
|
|
1185
|
+
* Path or URL to external model data file (.onnx.data weights).
|
|
1186
|
+
* Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
|
|
1187
|
+
*
|
|
1188
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1189
|
+
*/
|
|
1190
|
+
externalDataUrl?: string | false;
|
|
1151
1191
|
/** Preferred backend (default: 'wasm' — this model is CPU-optimized) */
|
|
1152
1192
|
backend?: BackendPreference;
|
|
1153
1193
|
}
|
|
1154
1194
|
declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
1195
|
+
readonly modelId: "wav2arkit_cpu";
|
|
1155
1196
|
private session;
|
|
1156
1197
|
private ort;
|
|
1157
1198
|
private config;
|
|
@@ -1161,12 +1202,6 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1161
1202
|
constructor(config: Wav2ArkitCpuConfig);
|
|
1162
1203
|
get backend(): RuntimeBackend | null;
|
|
1163
1204
|
get isLoaded(): boolean;
|
|
1164
|
-
/**
|
|
1165
|
-
* Preferred chunk size: 4000 samples (250ms at 16kHz).
|
|
1166
|
-
* wav2arkit_cpu accepts variable-length input, so we use smaller chunks
|
|
1167
|
-
* for lower latency on WASM (vs 16000 for Wav2Vec2's fixed requirement).
|
|
1168
|
-
*/
|
|
1169
|
-
readonly chunkSamples = 4000;
|
|
1170
1205
|
/**
|
|
1171
1206
|
* Load the ONNX model
|
|
1172
1207
|
*/
|
|
@@ -1195,7 +1230,7 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1195
1230
|
* Factory function for lip sync with automatic GPU/CPU model selection
|
|
1196
1231
|
*
|
|
1197
1232
|
* Provides a unified API that automatically selects the optimal model:
|
|
1198
|
-
* - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (
|
|
1233
|
+
* - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (404MB, WASM)
|
|
1199
1234
|
* - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (384MB, WebGPU)
|
|
1200
1235
|
* - Fallback: Gracefully falls back to CPU model if GPU model fails to load
|
|
1201
1236
|
*
|
|
@@ -1230,10 +1265,15 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1230
1265
|
interface CreateLipSyncConfig {
|
|
1231
1266
|
/** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge) */
|
|
1232
1267
|
gpuModelUrl: string;
|
|
1268
|
+
/**
|
|
1269
|
+
* URL for GPU model external data file (.onnx.data weights).
|
|
1270
|
+
* Default: `${gpuModelUrl}.data`
|
|
1271
|
+
*
|
|
1272
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1273
|
+
*/
|
|
1274
|
+
gpuExternalDataUrl?: string | false;
|
|
1233
1275
|
/** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS) */
|
|
1234
1276
|
cpuModelUrl: string;
|
|
1235
|
-
/** URL for the CPU model's external data file (.onnx.data weights) */
|
|
1236
|
-
cpuModelDataUrl?: string;
|
|
1237
1277
|
/**
|
|
1238
1278
|
* Model selection mode:
|
|
1239
1279
|
* - 'auto': Safari/iOS → CPU, everything else → GPU (default)
|
|
@@ -1388,6 +1428,7 @@ declare class SileroVADInference {
|
|
|
1388
1428
|
private inferenceQueue;
|
|
1389
1429
|
private preSpeechBuffer;
|
|
1390
1430
|
private wasSpeaking;
|
|
1431
|
+
private srTensor;
|
|
1391
1432
|
constructor(config: SileroVADConfig);
|
|
1392
1433
|
get backend(): RuntimeBackend | null;
|
|
1393
1434
|
get isLoaded(): boolean;
|
|
@@ -4086,4 +4127,4 @@ declare class EmphasisDetector {
|
|
|
4086
4127
|
reset(): void;
|
|
4087
4128
|
}
|
|
4088
4129
|
|
|
4089
|
-
export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, remapWav2ArkitToLam, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes, validateCachedResponse };
|
|
4130
|
+
export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes, validateCachedResponse };
|
package/dist/index.d.ts
CHANGED
|
@@ -27,11 +27,19 @@ declare class MicrophoneCapture {
|
|
|
27
27
|
private buffer;
|
|
28
28
|
private _isRecording;
|
|
29
29
|
private _loggedFirstChunk;
|
|
30
|
+
/** Actual AudioContext sample rate (may differ from target on Firefox) */
|
|
31
|
+
private _nativeSampleRate;
|
|
30
32
|
constructor(events: EventEmitter<OmoteEvents>, config?: MicrophoneCaptureConfig);
|
|
31
33
|
get isRecording(): boolean;
|
|
32
34
|
get isSupported(): boolean;
|
|
33
35
|
start(): Promise<void>;
|
|
34
36
|
stop(): void;
|
|
37
|
+
/**
|
|
38
|
+
* Resample audio using linear interpolation.
|
|
39
|
+
* Used when the AudioContext runs at the device's native rate (e.g. 48kHz)
|
|
40
|
+
* and we need to downsample to the target rate (e.g. 16kHz).
|
|
41
|
+
*/
|
|
42
|
+
private resample;
|
|
35
43
|
private floatToPCM16;
|
|
36
44
|
}
|
|
37
45
|
|
|
@@ -98,12 +106,11 @@ interface AudioSchedulerOptions {
|
|
|
98
106
|
/** Number of audio channels (default: 1 for mono) */
|
|
99
107
|
channels?: number;
|
|
100
108
|
/**
|
|
101
|
-
*
|
|
102
|
-
* Gives
|
|
103
|
-
*
|
|
104
|
-
* Default: 0.05 (50ms — just enough to enqueue the first node)
|
|
109
|
+
* Initial lookahead delay in seconds before first audio plays.
|
|
110
|
+
* Gives LAM inference time to compute blendshapes before audio starts.
|
|
111
|
+
* Default: 0.05 (50ms) for WebGPU, increase to 0.3-0.5 for WASM on iOS.
|
|
105
112
|
*/
|
|
106
|
-
|
|
113
|
+
initialLookaheadSec?: number;
|
|
107
114
|
}
|
|
108
115
|
declare class AudioScheduler {
|
|
109
116
|
private readonly options;
|
|
@@ -373,13 +380,14 @@ declare function isSafari(): boolean;
|
|
|
373
380
|
/**
|
|
374
381
|
* Recommend using CPU-optimized lip sync model (wav2arkit_cpu)
|
|
375
382
|
*
|
|
376
|
-
* All
|
|
377
|
-
*
|
|
378
|
-
*
|
|
379
|
-
*
|
|
380
|
-
*
|
|
383
|
+
* All iOS browsers use WebKit and have tight memory limits — the 384MB
|
|
384
|
+
* LAM model causes silent crashes. wav2arkit_cpu uses URL pass-through
|
|
385
|
+
* (ORT fetches the 402MB weights directly into WASM, no JS heap copy).
|
|
386
|
+
*
|
|
387
|
+
* macOS Safari also needs this due to ONNX Runtime JSEP/ASYNCIFY bugs
|
|
388
|
+
* that crash WebKit's JIT compiler.
|
|
381
389
|
*
|
|
382
|
-
* @returns true if
|
|
390
|
+
* @returns true if iOS (any browser) or Safari (any platform)
|
|
383
391
|
*/
|
|
384
392
|
declare function shouldUseCpuLipSync(): boolean;
|
|
385
393
|
/**
|
|
@@ -400,7 +408,7 @@ declare function isSpeechRecognitionAvailable(): boolean;
|
|
|
400
408
|
* - Battery-efficient (no WASM overhead)
|
|
401
409
|
* - No model download needed (saves 30-150MB)
|
|
402
410
|
*
|
|
403
|
-
* @returns true if on iOS with Speech API available
|
|
411
|
+
* @returns true if on iOS or Safari with Speech API available
|
|
404
412
|
*/
|
|
405
413
|
declare function shouldUseNativeASR(): boolean;
|
|
406
414
|
/**
|
|
@@ -419,7 +427,7 @@ declare function shouldUseServerLipSync(): boolean;
|
|
|
419
427
|
/**
|
|
420
428
|
* Common interface for lip sync inference backends
|
|
421
429
|
*
|
|
422
|
-
* Both Wav2Vec2Inference (GPU, 384MB) and Wav2ArkitCpuInference (CPU,
|
|
430
|
+
* Both Wav2Vec2Inference (GPU, 384MB) and Wav2ArkitCpuInference (CPU, 404MB)
|
|
423
431
|
* implement this interface, allowing SyncedAudioPipeline and LAMPipeline to
|
|
424
432
|
* work with either model transparently.
|
|
425
433
|
*
|
|
@@ -454,19 +462,15 @@ interface LipSyncResult {
|
|
|
454
462
|
*
|
|
455
463
|
* Implemented by:
|
|
456
464
|
* - Wav2Vec2Inference (WebGPU/WASM, 384MB, ASR + lip sync)
|
|
457
|
-
* - Wav2ArkitCpuInference (WASM-only,
|
|
465
|
+
* - Wav2ArkitCpuInference (WASM-only, 404MB, lip sync only)
|
|
458
466
|
*/
|
|
459
467
|
interface LipSyncBackend {
|
|
468
|
+
/** Model identifier for backend-specific tuning (e.g. audio delay) */
|
|
469
|
+
readonly modelId: 'wav2vec2' | 'wav2arkit_cpu';
|
|
460
470
|
/** Current backend type (webgpu, wasm, or null if not loaded) */
|
|
461
471
|
readonly backend: RuntimeBackend | null;
|
|
462
472
|
/** Whether the model is loaded and ready for inference */
|
|
463
473
|
readonly isLoaded: boolean;
|
|
464
|
-
/**
|
|
465
|
-
* Preferred number of audio samples per inference chunk.
|
|
466
|
-
* Models with variable-length input can use smaller values for lower latency.
|
|
467
|
-
* Default (if undefined): 16000 (1.0s at 16kHz, required by Wav2Vec2).
|
|
468
|
-
*/
|
|
469
|
-
readonly chunkSamples?: number;
|
|
470
474
|
/**
|
|
471
475
|
* Load the ONNX model
|
|
472
476
|
* @returns Model loading information
|
|
@@ -529,7 +533,7 @@ interface LAMPipelineOptions {
|
|
|
529
533
|
}
|
|
530
534
|
declare class LAMPipeline {
|
|
531
535
|
private readonly options;
|
|
532
|
-
private readonly
|
|
536
|
+
private readonly REQUIRED_SAMPLES;
|
|
533
537
|
private readonly FRAME_RATE;
|
|
534
538
|
private buffer;
|
|
535
539
|
private bufferStartTime;
|
|
@@ -558,13 +562,15 @@ declare class LAMPipeline {
|
|
|
558
562
|
/**
|
|
559
563
|
* Get the frame that should be displayed at the current time
|
|
560
564
|
*
|
|
561
|
-
*
|
|
562
|
-
*
|
|
563
|
-
*
|
|
565
|
+
* Automatically removes frames that have already been displayed.
|
|
566
|
+
* This prevents memory leaks from accumulating old frames.
|
|
567
|
+
*
|
|
568
|
+
* Discard Window (prevents premature frame discarding):
|
|
569
|
+
* - WebGPU: 0.5s (LAM inference 20-100ms + RAF jitter + React stalls)
|
|
570
|
+
* - WASM: 1.0s (LAM inference 50-500ms + higher variability)
|
|
564
571
|
*
|
|
565
|
-
*
|
|
566
|
-
*
|
|
567
|
-
* to natural 30fps pacing via timestamp gating.
|
|
572
|
+
* Last-Frame-Hold: Returns last valid frame instead of null to prevent
|
|
573
|
+
* avatar freezing when between frames (RAF at 60fps vs LAM at 30fps).
|
|
568
574
|
*
|
|
569
575
|
* @param currentTime - Current AudioContext time
|
|
570
576
|
* @param lam - LAM inference engine (optional, for backend detection)
|
|
@@ -592,7 +598,7 @@ declare class LAMPipeline {
|
|
|
592
598
|
/**
|
|
593
599
|
* Flush remaining buffered audio
|
|
594
600
|
*
|
|
595
|
-
* Processes any remaining audio in the buffer, even if less than
|
|
601
|
+
* Processes any remaining audio in the buffer, even if less than REQUIRED_SAMPLES.
|
|
596
602
|
* This ensures the final audio chunk generates blendshape frames.
|
|
597
603
|
*
|
|
598
604
|
* Should be called when audio stream ends to prevent losing the last 0-1 seconds.
|
|
@@ -645,6 +651,12 @@ interface SyncedAudioPipelineOptions {
|
|
|
645
651
|
chunkTargetMs?: number;
|
|
646
652
|
/** LAM inference engine */
|
|
647
653
|
lam: LipSyncBackend;
|
|
654
|
+
/**
|
|
655
|
+
* Audio playback delay in ms before first audio plays.
|
|
656
|
+
* Gives LAM inference time to pre-compute blendshapes.
|
|
657
|
+
* Default: auto-detected from lam.backend (50ms WebGPU, 350ms WASM).
|
|
658
|
+
*/
|
|
659
|
+
audioDelayMs?: number;
|
|
648
660
|
}
|
|
649
661
|
interface SyncedAudioPipelineEvents {
|
|
650
662
|
/** New frame ready for display */
|
|
@@ -838,6 +850,17 @@ declare function getLoadedBackend(): RuntimeBackend | null;
|
|
|
838
850
|
* Check if ONNX Runtime has been loaded
|
|
839
851
|
*/
|
|
840
852
|
declare function isOnnxRuntimeLoaded(): boolean;
|
|
853
|
+
/**
|
|
854
|
+
* Preload ONNX Runtime and compile the WASM binary early
|
|
855
|
+
*
|
|
856
|
+
* Call this before loading heavy resources (Three.js, VRM models) to ensure
|
|
857
|
+
* WASM memory is allocated in a clean JS heap, reducing iOS memory pressure.
|
|
858
|
+
* Uses the singleton pattern — subsequent model loading reuses this instance.
|
|
859
|
+
*
|
|
860
|
+
* @param preference Backend preference (default: 'auto')
|
|
861
|
+
* @returns The resolved backend that was loaded
|
|
862
|
+
*/
|
|
863
|
+
declare function preloadOnnxRuntime(preference?: BackendPreference): Promise<RuntimeBackend>;
|
|
841
864
|
|
|
842
865
|
/**
|
|
843
866
|
* Whisper Automatic Speech Recognition using transformers.js
|
|
@@ -1036,6 +1059,13 @@ type InferenceBackend = BackendPreference;
|
|
|
1036
1059
|
interface Wav2Vec2InferenceConfig {
|
|
1037
1060
|
/** Path or URL to the ONNX model */
|
|
1038
1061
|
modelUrl: string;
|
|
1062
|
+
/**
|
|
1063
|
+
* Path or URL to external model data file (.onnx.data weights).
|
|
1064
|
+
* Default: `${modelUrl}.data` (e.g., /models/model.onnx.data)
|
|
1065
|
+
*
|
|
1066
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1067
|
+
*/
|
|
1068
|
+
externalDataUrl?: string | false;
|
|
1039
1069
|
/** Preferred backend (auto will try WebGPU first, fallback to WASM) */
|
|
1040
1070
|
backend?: InferenceBackend;
|
|
1041
1071
|
/** Number of identity classes (default: 12 for streaming model) */
|
|
@@ -1066,7 +1096,8 @@ interface Wav2Vec2Result {
|
|
|
1066
1096
|
/** Inference time in ms */
|
|
1067
1097
|
inferenceTimeMs: number;
|
|
1068
1098
|
}
|
|
1069
|
-
declare class Wav2Vec2Inference {
|
|
1099
|
+
declare class Wav2Vec2Inference implements LipSyncBackend {
|
|
1100
|
+
readonly modelId: "wav2vec2";
|
|
1070
1101
|
private session;
|
|
1071
1102
|
private ort;
|
|
1072
1103
|
private config;
|
|
@@ -1116,12 +1147,16 @@ declare class Wav2Vec2Inference {
|
|
|
1116
1147
|
/**
|
|
1117
1148
|
* CPU-optimized lip sync inference using wav2arkit_cpu model
|
|
1118
1149
|
*
|
|
1119
|
-
* A
|
|
1120
|
-
* for
|
|
1150
|
+
* A Safari/iOS-compatible alternative to Wav2Vec2Inference (384MB) designed
|
|
1151
|
+
* for platforms where WebGPU crashes due to ONNX Runtime JSEP bugs.
|
|
1152
|
+
*
|
|
1153
|
+
* The model uses ONNX external data format:
|
|
1154
|
+
* - wav2arkit_cpu.onnx (1.86MB graph structure)
|
|
1155
|
+
* - wav2arkit_cpu.onnx.data (402MB weights)
|
|
1156
|
+
* Both files are fetched and cached automatically.
|
|
1121
1157
|
*
|
|
1122
1158
|
* Key differences from Wav2Vec2Inference:
|
|
1123
|
-
* - WASM-only backend (CPU-optimized, no WebGPU)
|
|
1124
|
-
* - 1.8MB model vs 384MB
|
|
1159
|
+
* - WASM-only backend (CPU-optimized, single-threaded, no WebGPU)
|
|
1125
1160
|
* - No identity input (baked to identity 11)
|
|
1126
1161
|
* - No ASR output (lip sync only)
|
|
1127
1162
|
* - Dynamic input length (not fixed to 16000 samples)
|
|
@@ -1146,12 +1181,18 @@ declare class Wav2Vec2Inference {
|
|
|
1146
1181
|
interface Wav2ArkitCpuConfig {
|
|
1147
1182
|
/** Path or URL to the wav2arkit_cpu ONNX model (.onnx graph file) */
|
|
1148
1183
|
modelUrl: string;
|
|
1149
|
-
/**
|
|
1150
|
-
|
|
1184
|
+
/**
|
|
1185
|
+
* Path or URL to external model data file (.onnx.data weights).
|
|
1186
|
+
* Default: `${modelUrl}.data` (e.g., /models/wav2arkit_cpu.onnx.data)
|
|
1187
|
+
*
|
|
1188
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1189
|
+
*/
|
|
1190
|
+
externalDataUrl?: string | false;
|
|
1151
1191
|
/** Preferred backend (default: 'wasm' — this model is CPU-optimized) */
|
|
1152
1192
|
backend?: BackendPreference;
|
|
1153
1193
|
}
|
|
1154
1194
|
declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
1195
|
+
readonly modelId: "wav2arkit_cpu";
|
|
1155
1196
|
private session;
|
|
1156
1197
|
private ort;
|
|
1157
1198
|
private config;
|
|
@@ -1161,12 +1202,6 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1161
1202
|
constructor(config: Wav2ArkitCpuConfig);
|
|
1162
1203
|
get backend(): RuntimeBackend | null;
|
|
1163
1204
|
get isLoaded(): boolean;
|
|
1164
|
-
/**
|
|
1165
|
-
* Preferred chunk size: 4000 samples (250ms at 16kHz).
|
|
1166
|
-
* wav2arkit_cpu accepts variable-length input, so we use smaller chunks
|
|
1167
|
-
* for lower latency on WASM (vs 16000 for Wav2Vec2's fixed requirement).
|
|
1168
|
-
*/
|
|
1169
|
-
readonly chunkSamples = 4000;
|
|
1170
1205
|
/**
|
|
1171
1206
|
* Load the ONNX model
|
|
1172
1207
|
*/
|
|
@@ -1195,7 +1230,7 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1195
1230
|
* Factory function for lip sync with automatic GPU/CPU model selection
|
|
1196
1231
|
*
|
|
1197
1232
|
* Provides a unified API that automatically selects the optimal model:
|
|
1198
|
-
* - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (
|
|
1233
|
+
* - Safari (macOS + iOS): Uses Wav2ArkitCpuInference (404MB, WASM)
|
|
1199
1234
|
* - Chrome/Firefox/Edge: Uses Wav2Vec2Inference (384MB, WebGPU)
|
|
1200
1235
|
* - Fallback: Gracefully falls back to CPU model if GPU model fails to load
|
|
1201
1236
|
*
|
|
@@ -1230,10 +1265,15 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1230
1265
|
interface CreateLipSyncConfig {
|
|
1231
1266
|
/** URL for the GPU model (Wav2Vec2, used on Chrome/Firefox/Edge) */
|
|
1232
1267
|
gpuModelUrl: string;
|
|
1268
|
+
/**
|
|
1269
|
+
* URL for GPU model external data file (.onnx.data weights).
|
|
1270
|
+
* Default: `${gpuModelUrl}.data`
|
|
1271
|
+
*
|
|
1272
|
+
* Set to `false` to skip external data loading (single-file models only).
|
|
1273
|
+
*/
|
|
1274
|
+
gpuExternalDataUrl?: string | false;
|
|
1233
1275
|
/** URL for the CPU model (wav2arkit_cpu, used on Safari/iOS) */
|
|
1234
1276
|
cpuModelUrl: string;
|
|
1235
|
-
/** URL for the CPU model's external data file (.onnx.data weights) */
|
|
1236
|
-
cpuModelDataUrl?: string;
|
|
1237
1277
|
/**
|
|
1238
1278
|
* Model selection mode:
|
|
1239
1279
|
* - 'auto': Safari/iOS → CPU, everything else → GPU (default)
|
|
@@ -1388,6 +1428,7 @@ declare class SileroVADInference {
|
|
|
1388
1428
|
private inferenceQueue;
|
|
1389
1429
|
private preSpeechBuffer;
|
|
1390
1430
|
private wasSpeaking;
|
|
1431
|
+
private srTensor;
|
|
1391
1432
|
constructor(config: SileroVADConfig);
|
|
1392
1433
|
get backend(): RuntimeBackend | null;
|
|
1393
1434
|
get isLoaded(): boolean;
|
|
@@ -4086,4 +4127,4 @@ declare class EmphasisDetector {
|
|
|
4086
4127
|
reset(): void;
|
|
4087
4128
|
}
|
|
4088
4129
|
|
|
4089
|
-
export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, remapWav2ArkitToLam, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes, validateCachedResponse };
|
|
4130
|
+
export { type AIAdapter, type AIAdapterEvents, type AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes, validateCachedResponse };
|