@omote/core 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +42 -33
- package/dist/index.d.ts +42 -33
- package/dist/index.js +156 -50
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +156 -50
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -2
package/dist/index.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.mjs';
|
|
1
|
+
import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.mjs';
|
|
2
2
|
export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
|
|
3
3
|
import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
|
|
4
4
|
export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
|
|
@@ -1412,6 +1412,8 @@ declare class SenseVoiceInference {
|
|
|
1412
1412
|
private _backend;
|
|
1413
1413
|
private isLoading;
|
|
1414
1414
|
private inferenceQueue;
|
|
1415
|
+
private poisoned;
|
|
1416
|
+
private static readonly INFERENCE_TIMEOUT_MS;
|
|
1415
1417
|
private tokenMap;
|
|
1416
1418
|
private negMean;
|
|
1417
1419
|
private invStddev;
|
|
@@ -1755,6 +1757,8 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1755
1757
|
private _backend;
|
|
1756
1758
|
private isLoading;
|
|
1757
1759
|
private inferenceQueue;
|
|
1760
|
+
private poisoned;
|
|
1761
|
+
private static readonly INFERENCE_TIMEOUT_MS;
|
|
1758
1762
|
constructor(config: Wav2ArkitCpuConfig);
|
|
1759
1763
|
get backend(): RuntimeBackend | null;
|
|
1760
1764
|
get isLoaded(): boolean;
|
|
@@ -1946,7 +1950,7 @@ interface VADModelInfo {
|
|
|
1946
1950
|
/**
|
|
1947
1951
|
* Result from a single VAD inference
|
|
1948
1952
|
*/
|
|
1949
|
-
interface VADResult
|
|
1953
|
+
interface VADResult {
|
|
1950
1954
|
/** Speech probability (0-1) */
|
|
1951
1955
|
probability: number;
|
|
1952
1956
|
/** Whether speech is detected (probability > threshold) */
|
|
@@ -2027,7 +2031,7 @@ declare class SileroVADInference {
|
|
|
2027
2031
|
* @param audioChunk - Float32Array of exactly chunkSize samples (512 for 16kHz, 256 for 8kHz)
|
|
2028
2032
|
* @returns VAD result with speech probability
|
|
2029
2033
|
*/
|
|
2030
|
-
process(audioChunk: Float32Array): Promise<VADResult
|
|
2034
|
+
process(audioChunk: Float32Array): Promise<VADResult>;
|
|
2031
2035
|
/**
|
|
2032
2036
|
* Process audio and detect speech segments
|
|
2033
2037
|
*
|
|
@@ -2043,10 +2047,6 @@ declare class SileroVADInference {
|
|
|
2043
2047
|
/** Padding to add before/after speech in ms (default: 30) */
|
|
2044
2048
|
speechPadMs?: number;
|
|
2045
2049
|
}): Promise<SpeechSegment[]>;
|
|
2046
|
-
/**
|
|
2047
|
-
* Calculate RMS energy of audio chunk
|
|
2048
|
-
*/
|
|
2049
|
-
private calculateRMS;
|
|
2050
2050
|
/**
|
|
2051
2051
|
* Queue inference to serialize ONNX session calls
|
|
2052
2052
|
*/
|
|
@@ -2057,6 +2057,37 @@ declare class SileroVADInference {
|
|
|
2057
2057
|
dispose(): Promise<void>;
|
|
2058
2058
|
}
|
|
2059
2059
|
|
|
2060
|
+
/**
|
|
2061
|
+
* Silero VAD Web Worker implementation
|
|
2062
|
+
*
|
|
2063
|
+
* Runs Silero VAD inference in a dedicated Web Worker to prevent main thread blocking.
|
|
2064
|
+
* Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
|
|
2065
|
+
*
|
|
2066
|
+
* Key design decisions:
|
|
2067
|
+
* - WASM backend only (WebGPU doesn't work in Workers)
|
|
2068
|
+
* - LSTM state serialized as Float32Array (Tensors can't cross worker boundary)
|
|
2069
|
+
* - Audio copied (not transferred) to retain main thread access for pre-speech buffer
|
|
2070
|
+
* - ONNX Runtime loaded from CDN in worker (no bundler complications)
|
|
2071
|
+
*
|
|
2072
|
+
* @category Inference
|
|
2073
|
+
*
|
|
2074
|
+
* @example Basic usage
|
|
2075
|
+
* ```typescript
|
|
2076
|
+
* import { SileroVADWorker } from '@omote/core';
|
|
2077
|
+
*
|
|
2078
|
+
* const vad = new SileroVADWorker({
|
|
2079
|
+
* modelUrl: '/models/silero-vad.onnx'
|
|
2080
|
+
* });
|
|
2081
|
+
* await vad.load();
|
|
2082
|
+
*
|
|
2083
|
+
* // Process 32ms chunks (512 samples at 16kHz)
|
|
2084
|
+
* const result = await vad.process(audioChunk);
|
|
2085
|
+
* if (result.isSpeech) {
|
|
2086
|
+
* console.log('Speech detected!', result.probability);
|
|
2087
|
+
* }
|
|
2088
|
+
* ```
|
|
2089
|
+
*/
|
|
2090
|
+
|
|
2060
2091
|
/**
|
|
2061
2092
|
* Configuration for Silero VAD Worker
|
|
2062
2093
|
*/
|
|
@@ -2091,25 +2122,7 @@ interface VADWorkerModelInfo {
|
|
|
2091
2122
|
sampleRate: number;
|
|
2092
2123
|
chunkSize: number;
|
|
2093
2124
|
}
|
|
2094
|
-
|
|
2095
|
-
* Result from a single VAD inference
|
|
2096
|
-
*/
|
|
2097
|
-
interface VADResult {
|
|
2098
|
-
/** Speech probability (0-1) */
|
|
2099
|
-
probability: number;
|
|
2100
|
-
/** Whether speech is detected (probability > threshold) */
|
|
2101
|
-
isSpeech: boolean;
|
|
2102
|
-
/** Inference time in milliseconds */
|
|
2103
|
-
inferenceTimeMs: number;
|
|
2104
|
-
/**
|
|
2105
|
-
* Pre-speech audio chunks (only present on first speech detection).
|
|
2106
|
-
* These are the N chunks immediately before VAD triggered, useful for
|
|
2107
|
-
* capturing the beginning of speech that occurred before detection.
|
|
2108
|
-
*
|
|
2109
|
-
* Only populated when transitioning from silence to speech.
|
|
2110
|
-
*/
|
|
2111
|
-
preSpeechChunks?: Float32Array[];
|
|
2112
|
-
}
|
|
2125
|
+
|
|
2113
2126
|
/**
|
|
2114
2127
|
* Silero VAD Worker - Voice Activity Detection in a Web Worker
|
|
2115
2128
|
*
|
|
@@ -2257,7 +2270,7 @@ interface SileroVADBackend {
|
|
|
2257
2270
|
* @param audioChunk - Float32Array of exactly chunkSize samples
|
|
2258
2271
|
* @returns VAD result with speech probability
|
|
2259
2272
|
*/
|
|
2260
|
-
process(audioChunk: Float32Array): Promise<VADResult
|
|
2273
|
+
process(audioChunk: Float32Array): Promise<VADResult>;
|
|
2261
2274
|
/**
|
|
2262
2275
|
* Reset state for new audio stream
|
|
2263
2276
|
*/
|
|
@@ -2791,10 +2804,7 @@ interface ConversationMessage {
|
|
|
2791
2804
|
/** Audio duration if applicable (ms) */
|
|
2792
2805
|
audioDurationMs?: number;
|
|
2793
2806
|
}
|
|
2794
|
-
|
|
2795
|
-
* Session state
|
|
2796
|
-
*/
|
|
2797
|
-
type AISessionState = 'idle' | 'listening' | 'thinking' | 'speaking' | 'interrupted' | 'error' | 'disconnected';
|
|
2807
|
+
|
|
2798
2808
|
/**
|
|
2799
2809
|
* Events emitted by AI adapters
|
|
2800
2810
|
*/
|
|
@@ -3106,7 +3116,6 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
|
|
|
3106
3116
|
* Falls back to simple RMS if VAD not available
|
|
3107
3117
|
*/
|
|
3108
3118
|
private detectVoiceActivity;
|
|
3109
|
-
private int16ToFloat32;
|
|
3110
3119
|
private base64ToArrayBuffer;
|
|
3111
3120
|
private addToHistory;
|
|
3112
3121
|
private handleDisconnect;
|
|
@@ -4704,4 +4713,4 @@ declare class ProceduralLifeLayer {
|
|
|
4704
4713
|
private updateBrowNoise;
|
|
4705
4714
|
}
|
|
4706
4715
|
|
|
4707
|
-
export { type AIAdapter, type AIAdapterEvents,
|
|
4716
|
+
export { type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.js';
|
|
1
|
+
import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.js';
|
|
2
2
|
export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
|
|
3
3
|
import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
|
|
4
4
|
export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.js';
|
|
@@ -1412,6 +1412,8 @@ declare class SenseVoiceInference {
|
|
|
1412
1412
|
private _backend;
|
|
1413
1413
|
private isLoading;
|
|
1414
1414
|
private inferenceQueue;
|
|
1415
|
+
private poisoned;
|
|
1416
|
+
private static readonly INFERENCE_TIMEOUT_MS;
|
|
1415
1417
|
private tokenMap;
|
|
1416
1418
|
private negMean;
|
|
1417
1419
|
private invStddev;
|
|
@@ -1755,6 +1757,8 @@ declare class Wav2ArkitCpuInference implements LipSyncBackend {
|
|
|
1755
1757
|
private _backend;
|
|
1756
1758
|
private isLoading;
|
|
1757
1759
|
private inferenceQueue;
|
|
1760
|
+
private poisoned;
|
|
1761
|
+
private static readonly INFERENCE_TIMEOUT_MS;
|
|
1758
1762
|
constructor(config: Wav2ArkitCpuConfig);
|
|
1759
1763
|
get backend(): RuntimeBackend | null;
|
|
1760
1764
|
get isLoaded(): boolean;
|
|
@@ -1946,7 +1950,7 @@ interface VADModelInfo {
|
|
|
1946
1950
|
/**
|
|
1947
1951
|
* Result from a single VAD inference
|
|
1948
1952
|
*/
|
|
1949
|
-
interface VADResult
|
|
1953
|
+
interface VADResult {
|
|
1950
1954
|
/** Speech probability (0-1) */
|
|
1951
1955
|
probability: number;
|
|
1952
1956
|
/** Whether speech is detected (probability > threshold) */
|
|
@@ -2027,7 +2031,7 @@ declare class SileroVADInference {
|
|
|
2027
2031
|
* @param audioChunk - Float32Array of exactly chunkSize samples (512 for 16kHz, 256 for 8kHz)
|
|
2028
2032
|
* @returns VAD result with speech probability
|
|
2029
2033
|
*/
|
|
2030
|
-
process(audioChunk: Float32Array): Promise<VADResult
|
|
2034
|
+
process(audioChunk: Float32Array): Promise<VADResult>;
|
|
2031
2035
|
/**
|
|
2032
2036
|
* Process audio and detect speech segments
|
|
2033
2037
|
*
|
|
@@ -2043,10 +2047,6 @@ declare class SileroVADInference {
|
|
|
2043
2047
|
/** Padding to add before/after speech in ms (default: 30) */
|
|
2044
2048
|
speechPadMs?: number;
|
|
2045
2049
|
}): Promise<SpeechSegment[]>;
|
|
2046
|
-
/**
|
|
2047
|
-
* Calculate RMS energy of audio chunk
|
|
2048
|
-
*/
|
|
2049
|
-
private calculateRMS;
|
|
2050
2050
|
/**
|
|
2051
2051
|
* Queue inference to serialize ONNX session calls
|
|
2052
2052
|
*/
|
|
@@ -2057,6 +2057,37 @@ declare class SileroVADInference {
|
|
|
2057
2057
|
dispose(): Promise<void>;
|
|
2058
2058
|
}
|
|
2059
2059
|
|
|
2060
|
+
/**
|
|
2061
|
+
* Silero VAD Web Worker implementation
|
|
2062
|
+
*
|
|
2063
|
+
* Runs Silero VAD inference in a dedicated Web Worker to prevent main thread blocking.
|
|
2064
|
+
* Uses inline worker script (Blob URL pattern) to avoid separate file deployment.
|
|
2065
|
+
*
|
|
2066
|
+
* Key design decisions:
|
|
2067
|
+
* - WASM backend only (WebGPU doesn't work in Workers)
|
|
2068
|
+
* - LSTM state serialized as Float32Array (Tensors can't cross worker boundary)
|
|
2069
|
+
* - Audio copied (not transferred) to retain main thread access for pre-speech buffer
|
|
2070
|
+
* - ONNX Runtime loaded from CDN in worker (no bundler complications)
|
|
2071
|
+
*
|
|
2072
|
+
* @category Inference
|
|
2073
|
+
*
|
|
2074
|
+
* @example Basic usage
|
|
2075
|
+
* ```typescript
|
|
2076
|
+
* import { SileroVADWorker } from '@omote/core';
|
|
2077
|
+
*
|
|
2078
|
+
* const vad = new SileroVADWorker({
|
|
2079
|
+
* modelUrl: '/models/silero-vad.onnx'
|
|
2080
|
+
* });
|
|
2081
|
+
* await vad.load();
|
|
2082
|
+
*
|
|
2083
|
+
* // Process 32ms chunks (512 samples at 16kHz)
|
|
2084
|
+
* const result = await vad.process(audioChunk);
|
|
2085
|
+
* if (result.isSpeech) {
|
|
2086
|
+
* console.log('Speech detected!', result.probability);
|
|
2087
|
+
* }
|
|
2088
|
+
* ```
|
|
2089
|
+
*/
|
|
2090
|
+
|
|
2060
2091
|
/**
|
|
2061
2092
|
* Configuration for Silero VAD Worker
|
|
2062
2093
|
*/
|
|
@@ -2091,25 +2122,7 @@ interface VADWorkerModelInfo {
|
|
|
2091
2122
|
sampleRate: number;
|
|
2092
2123
|
chunkSize: number;
|
|
2093
2124
|
}
|
|
2094
|
-
|
|
2095
|
-
* Result from a single VAD inference
|
|
2096
|
-
*/
|
|
2097
|
-
interface VADResult {
|
|
2098
|
-
/** Speech probability (0-1) */
|
|
2099
|
-
probability: number;
|
|
2100
|
-
/** Whether speech is detected (probability > threshold) */
|
|
2101
|
-
isSpeech: boolean;
|
|
2102
|
-
/** Inference time in milliseconds */
|
|
2103
|
-
inferenceTimeMs: number;
|
|
2104
|
-
/**
|
|
2105
|
-
* Pre-speech audio chunks (only present on first speech detection).
|
|
2106
|
-
* These are the N chunks immediately before VAD triggered, useful for
|
|
2107
|
-
* capturing the beginning of speech that occurred before detection.
|
|
2108
|
-
*
|
|
2109
|
-
* Only populated when transitioning from silence to speech.
|
|
2110
|
-
*/
|
|
2111
|
-
preSpeechChunks?: Float32Array[];
|
|
2112
|
-
}
|
|
2125
|
+
|
|
2113
2126
|
/**
|
|
2114
2127
|
* Silero VAD Worker - Voice Activity Detection in a Web Worker
|
|
2115
2128
|
*
|
|
@@ -2257,7 +2270,7 @@ interface SileroVADBackend {
|
|
|
2257
2270
|
* @param audioChunk - Float32Array of exactly chunkSize samples
|
|
2258
2271
|
* @returns VAD result with speech probability
|
|
2259
2272
|
*/
|
|
2260
|
-
process(audioChunk: Float32Array): Promise<VADResult
|
|
2273
|
+
process(audioChunk: Float32Array): Promise<VADResult>;
|
|
2261
2274
|
/**
|
|
2262
2275
|
* Reset state for new audio stream
|
|
2263
2276
|
*/
|
|
@@ -2791,10 +2804,7 @@ interface ConversationMessage {
|
|
|
2791
2804
|
/** Audio duration if applicable (ms) */
|
|
2792
2805
|
audioDurationMs?: number;
|
|
2793
2806
|
}
|
|
2794
|
-
|
|
2795
|
-
* Session state
|
|
2796
|
-
*/
|
|
2797
|
-
type AISessionState = 'idle' | 'listening' | 'thinking' | 'speaking' | 'interrupted' | 'error' | 'disconnected';
|
|
2807
|
+
|
|
2798
2808
|
/**
|
|
2799
2809
|
* Events emitted by AI adapters
|
|
2800
2810
|
*/
|
|
@@ -3106,7 +3116,6 @@ declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements
|
|
|
3106
3116
|
* Falls back to simple RMS if VAD not available
|
|
3107
3117
|
*/
|
|
3108
3118
|
private detectVoiceActivity;
|
|
3109
|
-
private int16ToFloat32;
|
|
3110
3119
|
private base64ToArrayBuffer;
|
|
3111
3120
|
private addToHistory;
|
|
3112
3121
|
private handleDisconnect;
|
|
@@ -4704,4 +4713,4 @@ declare class ProceduralLifeLayer {
|
|
|
4704
4713
|
private updateBrowNoise;
|
|
4705
4714
|
}
|
|
4706
4715
|
|
|
4707
|
-
export { type AIAdapter, type AIAdapterEvents,
|
|
4716
|
+
export { type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, type CTCDecodeResult, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateLipSyncConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_ARKIT_MAP, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type Emotion2VecLabel, type EmotionAnimationMap, type EmotionBlendMode, type EmotionBlendshapeConfig, EmotionController, type EmotionFrame, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionToBlendshapeMapper, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type KaldiFbankOptions, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LipSyncBackend, type LipSyncModelInfo, type LipSyncResult, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UPPER_FACE_BLENDSHAPES, type UpperFaceBlendshapeName, type UpperFaceBlendshapes, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, WAV2ARKIT_BLENDSHAPES, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, applyCMVN, applyLFR, blendEmotions, calculatePeak, calculateRMS, computeKaldiFbank, configureCacheLimit, configureTelemetry, createEmotionVector, createLipSync, createSessionWithFallback, createSileroVAD, ctcGreedyDecode, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, parseCMVNFromMetadata, parseTokensFile, preloadModels, preloadOnnxRuntime, remapWav2ArkitToLam, resolveBackend, resolveLanguageId, resolveTextNormId, shouldEnableWasmProxy, shouldUseCpuLipSync, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, symmetrizeBlendshapes };
|