npm - @omote/core - Versions diffs - 0.9.1 → 0.9.2 - Mend

@omote/core 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/ErrorCodes-AX3ADZri.d.mts +266 -0
package/dist/ErrorCodes-AX3ADZri.d.ts +266 -0
package/dist/chunk-CYBTTLG7.mjs +927 -0
package/dist/chunk-CYBTTLG7.mjs.map +1 -0
package/dist/chunk-X5OTUOE6.mjs +927 -0
package/dist/chunk-X5OTUOE6.mjs.map +1 -0
package/dist/chunk-Y3DTP5P3.mjs +927 -0
package/dist/chunk-Y3DTP5P3.mjs.map +1 -0
package/dist/index.d.mts +214 -3
package/dist/index.d.ts +214 -3
package/dist/index.js +711 -231
package/dist/index.js.map +1 -1
package/dist/index.mjs +636 -223
package/dist/index.mjs.map +1 -1
package/dist/logging/index.d.mts +2 -2
package/dist/logging/index.d.ts +2 -2
package/dist/logging/index.js +75 -1
package/dist/logging/index.js.map +1 -1
package/dist/logging/index.mjs +9 -1
package/package.json +3 -1

package/dist/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { EventEmitter, OmoteEvents } from './events/index.js';
 export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
-export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, a as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, c as LogLevel, d as LogSink, e as LoggingConfig, g as configureLogging, h as createLogger, i as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, k as setLoggingEnabled } from './Logger-BeUI6jG7.js';
+export { C as Clock, D as DEFAULT_LOGGING_CONFIG, E as ErrorCode, a as ErrorCodes, I as ILogger, b as LOG_LEVEL_PRIORITY, c as LogEntry, L as LogFormatter, d as LogLevel, e as LogSink, f as LoggingConfig, h as configureClock, i as configureLogging, j as createLogger, l as getClock, m as getLoggingConfig, o as noopLogger, r as resetLoggingConfig, s as setLogLevel, p as setLoggingEnabled } from './ErrorCodes-AX3ADZri.js';
 export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
 /**
@@ -163,6 +163,8 @@ interface AudioSchedulerOptions {
      * Default: 0.05 (50ms) for WebGPU, increase to 0.3-0.5 for WASM on iOS.
      */
     initialLookaheadSec?: number;
+    /** Error callback for critical scheduling issues */
+    onError?: (error: Error) => void;
 }
 declare class AudioScheduler {
     private readonly options;
@@ -964,6 +966,8 @@ declare class SenseVoiceInference {
     private inferenceQueue;
     private poisoned;
     private static readonly INFERENCE_TIMEOUT_MS;
+    private lastLfrFrames;
+    private webgpuShapeWarned;
     private tokenMap;
     private negMean;
     private invStddev;
@@ -1525,7 +1529,10 @@ interface FrameSource {
         blendshapes: Float32Array;
         emotion?: string;
     }) => void): void;
-    off?(event: 'frame', callback: (...args: any[]) => void): void;
+    off?(event: 'frame', callback: (frame: {
+        blendshapes: Float32Array;
+        emotion?: string;
+    }) => void): void;
 }
 type VoicePipelineState = 'idle' | 'loading' | 'ready' | 'listening' | 'thinking' | 'speaking' | 'error';
 interface LoadingProgress {
@@ -2036,6 +2043,7 @@ declare class SpeechListener extends EventEmitter<SpeechListenerEvents> {
     private lastProgressiveResult;
     private lastProgressiveSamples;
     private asrErrorCount;
+    private progressiveErrorCount;
     /** Current listener state */
     get state(): SpeechListenerState;
     constructor(config?: SpeechListenerConfig);
@@ -3318,6 +3326,10 @@ declare const KOKORO_VOICES: {
     readonly bm_fable: "bm_fable";
     readonly bm_george: "bm_george";
     readonly bm_lewis: "bm_lewis";
+    readonly ef_dora: "ef_dora";
+    readonly em_alex: "em_alex";
+    readonly em_santa: "em_santa";
+    readonly ff_siwis: "ff_siwis";
 };
 type KokoroVoiceName = keyof typeof KOKORO_VOICES;
 /**
@@ -3325,6 +3337,198 @@ type KokoroVoiceName = keyof typeof KOKORO_VOICES;
  */
 declare function listVoices(): string[];
+/**
+ * ElevenLabs TTS Backend — Cloud text-to-speech via ElevenLabs REST API.
+ *
+ * Implements the TTSBackend interface so it can be used anywhere Kokoro TTS is used
+ * (TTSPlayback, TTSSpeaker, VoicePipeline, PlaybackPipeline, etc.)
+ *
+ * Zero external dependencies — uses fetch() directly.
+ *
+ * @category Inference
+ *
+ * @example Basic usage
+ * ```typescript
+ * import { ElevenLabsTTSBackend } from '@omote/core';
+ *
+ * const tts = new ElevenLabsTTSBackend({
+ *   apiKey: 'your-api-key',
+ *   voiceId: 'voice-id',
+ * });
+ * await tts.load();
+ *
+ * for await (const chunk of tts.stream("Hello world!")) {
+ *   playbackPipeline.feedBuffer(chunk.audio);
+ * }
+ * ```
+ *
+ * @example With PlaybackPipeline
+ * ```typescript
+ * const speaker = new TTSSpeaker();
+ * await speaker.connect(tts, { lam: createA2E() });
+ * await speaker.speak("Hello!");
+ * ```
+ */
+interface ElevenLabsConfig {
+    /** ElevenLabs API key */
+    apiKey: string;
+    /** Voice ID to use */
+    voiceId: string;
+    /** Model ID (default: 'eleven_multilingual_v2') */
+    model?: string;
+    /**
+     * Output format (default: 'pcm_16000').
+     * Use 'pcm_16000' for lip sync compatibility (16kHz matches A2E input).
+     * Other options: 'pcm_22050', 'pcm_24000', 'pcm_44100'
+     */
+    outputFormat?: string;
+    /** Voice stability 0-1 (default: 0.5) */
+    stability?: number;
+    /** Voice similarity boost 0-1 (default: 0.75) */
+    similarityBoost?: number;
+    /** API base URL override (default: 'https://api.elevenlabs.io') */
+    baseUrl?: string;
+}
+declare class ElevenLabsTTSBackend implements TTSBackend {
+    private readonly apiKey;
+    private readonly voiceId;
+    private readonly model;
+    private readonly outputFormat;
+    private readonly stability;
+    private readonly similarityBoost;
+    private readonly baseUrl;
+    private readonly _sampleRate;
+    private _isLoaded;
+    constructor(config: ElevenLabsConfig);
+    get sampleRate(): number;
+    get isLoaded(): boolean;
+    /**
+     * No-op for cloud TTS (no model to load).
+     * Marks backend as ready.
+     */
+    load(): Promise<void>;
+    /**
+     * Stream audio from ElevenLabs for the given text.
+     *
+     * Uses the streaming endpoint. Yields a single chunk for non-streaming
+     * or multiple chunks as response data arrives.
+     */
+    stream(text: string, options?: TTSStreamOptions): AsyncGenerator<TTSChunk>;
+    dispose(): Promise<void>;
+    private getHttpErrorMessage;
+}
+/**
+ * AWS Polly TTS Backend — Cloud text-to-speech via consumer-provided AWS SDK call.
+ *
+ * Implements the TTSBackend interface. Keeps @omote/core free of AWS SDK dependencies
+ * by delegating the actual Polly API call to a consumer-provided function.
+ *
+ * @category Inference
+ *
+ * @example Basic usage with AWS SDK v3
+ * ```typescript
+ * import { PollyTTSBackend } from '@omote/core';
+ * import { PollyClient, SynthesizeSpeechCommand } from '@aws-sdk/client-polly';
+ *
+ * const polly = new PollyClient({ region: 'us-east-1' });
+ *
+ * const tts = new PollyTTSBackend({
+ *   synthesizeFn: async (text, voice, sampleRate) => {
+ *     const cmd = new SynthesizeSpeechCommand({
+ *       Text: text,
+ *       VoiceId: voice,
+ *       Engine: 'neural',
+ *       OutputFormat: 'pcm',
+ *       SampleRate: String(sampleRate),
+ *     });
+ *     const result = await polly.send(cmd);
+ *     const stream = result.AudioStream;
+ *     // Convert stream to ArrayBuffer (Node or browser)
+ *     const chunks: Uint8Array[] = [];
+ *     for await (const chunk of stream as AsyncIterable<Uint8Array>) {
+ *       chunks.push(chunk);
+ *     }
+ *     const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
+ *     const merged = new Uint8Array(totalLength);
+ *     let offset = 0;
+ *     for (const chunk of chunks) {
+ *       merged.set(chunk, offset);
+ *       offset += chunk.length;
+ *     }
+ *     return {
+ *       audio: merged.buffer,
+ *       contentType: result.ContentType ?? 'audio/pcm',
+ *     };
+ *   },
+ * });
+ *
+ * await tts.load();
+ * for await (const chunk of tts.stream("Hello world!")) {
+ *   playbackPipeline.feedBuffer(chunk.audio);
+ * }
+ * ```
+ */
+/**
+ * Result from the consumer-provided synthesize function.
+ */
+interface PollySynthesizeResult {
+    /** Raw PCM audio bytes (Int16 LE) */
+    audio: ArrayBuffer;
+    /** Content type from Polly response (e.g., 'audio/pcm') */
+    contentType: string;
+}
+/**
+ * Configuration for PollyTTSBackend.
+ *
+ * The `synthesizeFn` callback lets consumers use their own AWS SDK setup
+ * (credentials, region, SDK version) without @omote/core depending on `@aws-sdk/client-polly`.
+ */
+interface PollyConfig {
+    /**
+     * Consumer-provided function that calls AWS Polly.
+     * Must return PCM audio (Int16 LE) at the requested sample rate.
+     *
+     * @param text - Text to synthesize
+     * @param voice - Polly voice ID (e.g., 'Joanna')
+     * @param sampleRate - Requested output sample rate (e.g., 16000)
+     * @returns PCM audio buffer and content type
+     */
+    synthesizeFn: (text: string, voice: string, sampleRate: number) => Promise<PollySynthesizeResult>;
+    /** Polly voice ID (default: 'Joanna') */
+    voice?: string;
+    /** Output sample rate in Hz (default: 16000) */
+    sampleRate?: number;
+    /** Polly engine type (default: 'neural') */
+    engine?: 'neural' | 'standard' | 'generative' | 'long-form';
+}
+declare class PollyTTSBackend implements TTSBackend {
+    private readonly synthesizeFn;
+    private readonly voice;
+    private readonly _sampleRate;
+    private readonly engine;
+    private _isLoaded;
+    constructor(config: PollyConfig);
+    get sampleRate(): number;
+    get isLoaded(): boolean;
+    /**
+     * No-op for cloud TTS (no model to load).
+     * Marks backend as ready.
+     */
+    load(): Promise<void>;
+    /**
+     * Synthesize audio via consumer's Polly function.
+     *
+     * Polly's SynthesizeSpeech is request/response (not streaming for PCM),
+     * so this yields a single chunk per call. For long text, consider splitting
+     * into sentences on the consumer side.
+     */
+    stream(text: string, options?: TTSStreamOptions): AsyncGenerator<TTSChunk>;
+    dispose(): Promise<void>;
+}
 /**
  * ORT CDN configuration
  *
@@ -3889,6 +4093,12 @@ declare const MetricNames: {
     readonly CACHE_HITS: "omote.cache.hits";
     /** Counter: Cache misses */
     readonly CACHE_MISSES: "omote.cache.misses";
+    /** Counter: Cache stale (version/etag mismatch) */
+    readonly CACHE_STALE: "omote.cache.stale";
+    /** Counter: Cache quota warning (>90% used) */
+    readonly CACHE_QUOTA_WARNING: "omote.cache.quota_warning";
+    /** Counter: Cache eviction (LRU) */
+    readonly CACHE_EVICTION: "omote.cache.eviction";
     /** Histogram: VoicePipeline turn latency (speech end → transcript ready, excludes playback) */
     readonly VOICE_TURN_LATENCY: "omote.voice.turn.latency";
     /** Histogram: ASR transcription latency in ms */
@@ -5517,6 +5727,7 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
     private lastProgressiveResult;
     private lastProgressiveSamples;
     private asrErrorCount;
+    private progressiveErrorCount;
     private responseAbortController;
     private _unsubChunk;
     private _unsubLevel;
@@ -5645,4 +5856,4 @@ declare class VoiceOrchestrator extends EventEmitter<VoiceOrchestratorEvents> {
     private setState;
 }
-export { type A2EBackend, A2EInference, type A2EInferenceConfig, type A2EModelInfo, A2EProcessor, type A2EProcessorConfig, type A2EResult, A2EUnifiedAdapter, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, type AnimationController, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationSource, type AnimationSourceOptions, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, type BoneFilterConfig, type CacheConfig, type CacheSpanAttributes, CharacterController, type CharacterControllerConfig, type CharacterProfile, type CharacterUpdateInput, type CharacterUpdateOutput, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateKokoroTTSConfig, type CreateSenseVoiceConfig, type CreateTTSPlayerConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_BONE_FILTER, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, EXPLICIT_EMOTION_COUNT, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, type ErrorType, ErrorTypes, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FrameSource, type FullFaceFrame, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceFactoryConfig, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, KOKORO_VOICES, type KokoroStreamChunk, type KokoroTTSConfig, KokoroTTSInference, type KokoroTTSModelInfo, type KokoroTTSResult, KokoroTTSUnifiedAdapter, KokoroTTSWorker, type KokoroVoiceName, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MIXAMO_PREFIX, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PRESERVE_POSITION_BONES, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type Quat, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, SpeechListener, type SpeechListenerConfig, type SpeechListenerEvents, type SpeechListenerState, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type SynthesizeOptions, type TTSBackend, type TTSChunk, TTSPlayback, type TTSPlaybackConfig, type TTSPlaybackEvents, TTSPlayer, TTSSpeaker, type TTSSpeakerConfig, type TTSStreamOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TrackDescriptor, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Vec3, VoiceOrchestrator, type VoiceOrchestratorCloudConfig, type VoiceOrchestratorConfig, type VoiceOrchestratorEvents, type VoiceOrchestratorLocalConfig, VoicePipeline, type VoicePipelineCloudConfig, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineLocalConfig, type VoicePipelineState, A2EInference as Wav2Vec2Inference, type WorkerHealthState, analyzeTextEmotion, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureOrtCdn, configureTelemetry, createA2E, createEmotionVector, createKokoroTTS, createSenseVoice, createSileroVAD, createTTSPlayer, fetchWithCache, float32ToPcm16, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getOrtCdnBase, getRecommendedBackend, getTelemetry, hasWebGPUApi, int16ToFloat32, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, listVoices as listKokoroVoices, parseEmotionTags, pcm16ToFloat32, preloadModels, resampleLinear, resetModelUrls, resolveBackend, resolveEmotion, shouldEnableWasmProxy, shouldKeepTrack, shouldUseNativeASR, shouldUseServerA2E, stripMixamoPrefix, supportsVADWorker, ttsToPlaybackFormat, validateTTSInput };
+export { type A2EBackend, A2EInference, type A2EInferenceConfig, type A2EModelInfo, A2EProcessor, type A2EProcessorConfig, type A2EResult, A2EUnifiedAdapter, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, type AnimationController, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationSource, type AnimationSourceOptions, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, type BoneFilterConfig, type CacheConfig, type CacheSpanAttributes, CharacterController, type CharacterControllerConfig, type CharacterProfile, type CharacterUpdateInput, type CharacterUpdateOutput, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateKokoroTTSConfig, type CreateSenseVoiceConfig, type CreateTTSPlayerConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_BONE_FILTER, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, EXPLICIT_EMOTION_COUNT, type ElevenLabsConfig, ElevenLabsTTSBackend, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, type ErrorType, ErrorTypes, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FrameSource, type FullFaceFrame, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceFactoryConfig, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, KOKORO_VOICES, type KokoroStreamChunk, type KokoroTTSConfig, KokoroTTSInference, type KokoroTTSModelInfo, type KokoroTTSResult, KokoroTTSUnifiedAdapter, KokoroTTSWorker, type KokoroVoiceName, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MIXAMO_PREFIX, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PRESERVE_POSITION_BONES, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, type PollyConfig, type PollySynthesizeResult, PollyTTSBackend, ProceduralLifeLayer, type Quat, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, SpeechListener, type SpeechListenerConfig, type SpeechListenerEvents, type SpeechListenerState, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type SynthesizeOptions, type TTSBackend, type TTSChunk, TTSPlayback, type TTSPlaybackConfig, type TTSPlaybackEvents, TTSPlayer, TTSSpeaker, type TTSSpeakerConfig, type TTSStreamOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TrackDescriptor, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Vec3, VoiceOrchestrator, type VoiceOrchestratorCloudConfig, type VoiceOrchestratorConfig, type VoiceOrchestratorEvents, type VoiceOrchestratorLocalConfig, VoicePipeline, type VoicePipelineCloudConfig, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineLocalConfig, type VoicePipelineState, A2EInference as Wav2Vec2Inference, type WorkerHealthState, analyzeTextEmotion, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureOrtCdn, configureTelemetry, createA2E, createEmotionVector, createKokoroTTS, createSenseVoice, createSileroVAD, createTTSPlayer, fetchWithCache, float32ToPcm16, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getOrtCdnBase, getRecommendedBackend, getTelemetry, hasWebGPUApi, int16ToFloat32, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, listVoices as listKokoroVoices, parseEmotionTags, pcm16ToFloat32, preloadModels, resampleLinear, resetModelUrls, resolveBackend, resolveEmotion, shouldEnableWasmProxy, shouldKeepTrack, shouldUseNativeASR, shouldUseServerA2E, stripMixamoPrefix, supportsVADWorker, ttsToPlaybackFormat, validateTTSInput };