@omote/core 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ErrorCodes-AX3ADZri.d.mts +266 -0
- package/dist/ErrorCodes-AX3ADZri.d.ts +266 -0
- package/dist/chunk-CYBTTLG7.mjs +927 -0
- package/dist/chunk-CYBTTLG7.mjs.map +1 -0
- package/dist/chunk-X5OTUOE6.mjs +927 -0
- package/dist/chunk-X5OTUOE6.mjs.map +1 -0
- package/dist/chunk-Y3DTP5P3.mjs +927 -0
- package/dist/chunk-Y3DTP5P3.mjs.map +1 -0
- package/dist/index.d.mts +214 -3
- package/dist/index.d.ts +214 -3
- package/dist/index.js +711 -231
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +636 -223
- package/dist/index.mjs.map +1 -1
- package/dist/logging/index.d.mts +2 -2
- package/dist/logging/index.d.ts +2 -2
- package/dist/logging/index.js +75 -1
- package/dist/logging/index.js.map +1 -1
- package/dist/logging/index.mjs +9 -1
- package/package.json +3 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { EventEmitter, OmoteEvents } from './events/index.js';
|
|
2
2
|
export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
|
|
3
|
-
export { D as DEFAULT_LOGGING_CONFIG, I as ILogger,
|
|
3
|
+
export { C as Clock, D as DEFAULT_LOGGING_CONFIG, E as ErrorCode, a as ErrorCodes, I as ILogger, b as LOG_LEVEL_PRIORITY, c as LogEntry, L as LogFormatter, d as LogLevel, e as LogSink, f as LoggingConfig, h as configureClock, i as configureLogging, j as createLogger, l as getClock, m as getLoggingConfig, o as noopLogger, r as resetLoggingConfig, s as setLogLevel, p as setLoggingEnabled } from './ErrorCodes-AX3ADZri.js';
|
|
4
4
|
export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
|
|
5
5
|
|
|
6
6
|
/**
|
|
@@ -163,6 +163,8 @@ interface AudioSchedulerOptions {
|
|
|
163
163
|
* Default: 0.05 (50ms) for WebGPU, increase to 0.3-0.5 for WASM on iOS.
|
|
164
164
|
*/
|
|
165
165
|
initialLookaheadSec?: number;
|
|
166
|
+
/** Error callback for critical scheduling issues */
|
|
167
|
+
onError?: (error: Error) => void;
|
|
166
168
|
}
|
|
167
169
|
declare class AudioScheduler {
|
|
168
170
|
private readonly options;
|
|
@@ -964,6 +966,8 @@ declare class SenseVoiceInference {
|
|
|
964
966
|
private inferenceQueue;
|
|
965
967
|
private poisoned;
|
|
966
968
|
private static readonly INFERENCE_TIMEOUT_MS;
|
|
969
|
+
private lastLfrFrames;
|
|
970
|
+
private webgpuShapeWarned;
|
|
967
971
|
private tokenMap;
|
|
968
972
|
private negMean;
|
|
969
973
|
private invStddev;
|
|
@@ -1525,7 +1529,10 @@ interface FrameSource {
|
|
|
1525
1529
|
blendshapes: Float32Array;
|
|
1526
1530
|
emotion?: string;
|
|
1527
1531
|
}) => void): void;
|
|
1528
|
-
off?(event: 'frame', callback: (
|
|
1532
|
+
off?(event: 'frame', callback: (frame: {
|
|
1533
|
+
blendshapes: Float32Array;
|
|
1534
|
+
emotion?: string;
|
|
1535
|
+
}) => void): void;
|
|
1529
1536
|
}
|
|
1530
1537
|
type VoicePipelineState = 'idle' | 'loading' | 'ready' | 'listening' | 'thinking' | 'speaking' | 'error';
|
|
1531
1538
|
interface LoadingProgress {
|
|
@@ -2036,6 +2043,7 @@ declare class SpeechListener extends EventEmitter<SpeechListenerEvents> {
|
|
|
2036
2043
|
private lastProgressiveResult;
|
|
2037
2044
|
private lastProgressiveSamples;
|
|
2038
2045
|
private asrErrorCount;
|
|
2046
|
+
private progressiveErrorCount;
|
|
2039
2047
|
/** Current listener state */
|
|
2040
2048
|
get state(): SpeechListenerState;
|
|
2041
2049
|
constructor(config?: SpeechListenerConfig);
|
|
@@ -3318,6 +3326,10 @@ declare const KOKORO_VOICES: {
|
|
|
3318
3326
|
readonly bm_fable: "bm_fable";
|
|
3319
3327
|
readonly bm_george: "bm_george";
|
|
3320
3328
|
readonly bm_lewis: "bm_lewis";
|
|
3329
|
+
readonly ef_dora: "ef_dora";
|
|
3330
|
+
readonly em_alex: "em_alex";
|
|
3331
|
+
readonly em_santa: "em_santa";
|
|
3332
|
+
readonly ff_siwis: "ff_siwis";
|
|
3321
3333
|
};
|
|
3322
3334
|
type KokoroVoiceName = keyof typeof KOKORO_VOICES;
|
|
3323
3335
|
/**
|
|
@@ -3325,6 +3337,198 @@ type KokoroVoiceName = keyof typeof KOKORO_VOICES;
|
|
|
3325
3337
|
*/
|
|
3326
3338
|
declare function listVoices(): string[];
|
|
3327
3339
|
|
|
3340
|
+
/**
|
|
3341
|
+
* ElevenLabs TTS Backend — Cloud text-to-speech via ElevenLabs REST API.
|
|
3342
|
+
*
|
|
3343
|
+
* Implements the TTSBackend interface so it can be used anywhere Kokoro TTS is used
|
|
3344
|
+
* (TTSPlayback, TTSSpeaker, VoicePipeline, PlaybackPipeline, etc.)
|
|
3345
|
+
*
|
|
3346
|
+
* Zero external dependencies — uses fetch() directly.
|
|
3347
|
+
*
|
|
3348
|
+
* @category Inference
|
|
3349
|
+
*
|
|
3350
|
+
* @example Basic usage
|
|
3351
|
+
* ```typescript
|
|
3352
|
+
* import { ElevenLabsTTSBackend } from '@omote/core';
|
|
3353
|
+
*
|
|
3354
|
+
* const tts = new ElevenLabsTTSBackend({
|
|
3355
|
+
* apiKey: 'your-api-key',
|
|
3356
|
+
* voiceId: 'voice-id',
|
|
3357
|
+
* });
|
|
3358
|
+
* await tts.load();
|
|
3359
|
+
*
|
|
3360
|
+
* for await (const chunk of tts.stream("Hello world!")) {
|
|
3361
|
+
* playbackPipeline.feedBuffer(chunk.audio);
|
|
3362
|
+
* }
|
|
3363
|
+
* ```
|
|
3364
|
+
*
|
|
3365
|
+
* @example With PlaybackPipeline
|
|
3366
|
+
* ```typescript
|
|
3367
|
+
* const speaker = new TTSSpeaker();
|
|
3368
|
+
* await speaker.connect(tts, { lam: createA2E() });
|
|
3369
|
+
* await speaker.speak("Hello!");
|
|
3370
|
+
* ```
|
|
3371
|
+
*/
|
|
3372
|
+
|
|
3373
|
+
interface ElevenLabsConfig {
|
|
3374
|
+
/** ElevenLabs API key */
|
|
3375
|
+
apiKey: string;
|
|
3376
|
+
/** Voice ID to use */
|
|
3377
|
+
voiceId: string;
|
|
3378
|
+
/** Model ID (default: 'eleven_multilingual_v2') */
|
|
3379
|
+
model?: string;
|
|
3380
|
+
/**
|
|
3381
|
+
* Output format (default: 'pcm_16000').
|
|
3382
|
+
* Use 'pcm_16000' for lip sync compatibility (16kHz matches A2E input).
|
|
3383
|
+
* Other options: 'pcm_22050', 'pcm_24000', 'pcm_44100'
|
|
3384
|
+
*/
|
|
3385
|
+
outputFormat?: string;
|
|
3386
|
+
/** Voice stability 0-1 (default: 0.5) */
|
|
3387
|
+
stability?: number;
|
|
3388
|
+
/** Voice similarity boost 0-1 (default: 0.75) */
|
|
3389
|
+
similarityBoost?: number;
|
|
3390
|
+
/** API base URL override (default: 'https://api.elevenlabs.io') */
|
|
3391
|
+
baseUrl?: string;
|
|
3392
|
+
}
|
|
3393
|
+
declare class ElevenLabsTTSBackend implements TTSBackend {
|
|
3394
|
+
private readonly apiKey;
|
|
3395
|
+
private readonly voiceId;
|
|
3396
|
+
private readonly model;
|
|
3397
|
+
private readonly outputFormat;
|
|
3398
|
+
private readonly stability;
|
|
3399
|
+
private readonly similarityBoost;
|
|
3400
|
+
private readonly baseUrl;
|
|
3401
|
+
private readonly _sampleRate;
|
|
3402
|
+
private _isLoaded;
|
|
3403
|
+
constructor(config: ElevenLabsConfig);
|
|
3404
|
+
get sampleRate(): number;
|
|
3405
|
+
get isLoaded(): boolean;
|
|
3406
|
+
/**
|
|
3407
|
+
* No-op for cloud TTS (no model to load).
|
|
3408
|
+
* Marks backend as ready.
|
|
3409
|
+
*/
|
|
3410
|
+
load(): Promise<void>;
|
|
3411
|
+
/**
|
|
3412
|
+
* Stream audio from ElevenLabs for the given text.
|
|
3413
|
+
*
|
|
3414
|
+
* Uses the streaming endpoint. Yields a single chunk for non-streaming
|
|
3415
|
+
* or multiple chunks as response data arrives.
|
|
3416
|
+
*/
|
|
3417
|
+
stream(text: string, options?: TTSStreamOptions): AsyncGenerator<TTSChunk>;
|
|
3418
|
+
dispose(): Promise<void>;
|
|
3419
|
+
private getHttpErrorMessage;
|
|
3420
|
+
}
|
|
3421
|
+
|
|
3422
|
+
/**
|
|
3423
|
+
* AWS Polly TTS Backend — Cloud text-to-speech via consumer-provided AWS SDK call.
|
|
3424
|
+
*
|
|
3425
|
+
* Implements the TTSBackend interface. Keeps @omote/core free of AWS SDK dependencies
|
|
3426
|
+
* by delegating the actual Polly API call to a consumer-provided function.
|
|
3427
|
+
*
|
|
3428
|
+
* @category Inference
|
|
3429
|
+
*
|
|
3430
|
+
* @example Basic usage with AWS SDK v3
|
|
3431
|
+
* ```typescript
|
|
3432
|
+
* import { PollyTTSBackend } from '@omote/core';
|
|
3433
|
+
* import { PollyClient, SynthesizeSpeechCommand } from '@aws-sdk/client-polly';
|
|
3434
|
+
*
|
|
3435
|
+
* const polly = new PollyClient({ region: 'us-east-1' });
|
|
3436
|
+
*
|
|
3437
|
+
* const tts = new PollyTTSBackend({
|
|
3438
|
+
* synthesizeFn: async (text, voice, sampleRate) => {
|
|
3439
|
+
* const cmd = new SynthesizeSpeechCommand({
|
|
3440
|
+
* Text: text,
|
|
3441
|
+
* VoiceId: voice,
|
|
3442
|
+
* Engine: 'neural',
|
|
3443
|
+
* OutputFormat: 'pcm',
|
|
3444
|
+
* SampleRate: String(sampleRate),
|
|
3445
|
+
* });
|
|
3446
|
+
* const result = await polly.send(cmd);
|
|
3447
|
+
* const stream = result.AudioStream;
|
|
3448
|
+
* // Convert stream to ArrayBuffer (Node or browser)
|
|
3449
|
+
* const chunks: Uint8Array[] = [];
|
|
3450
|
+
* for await (const chunk of stream as AsyncIterable<Uint8Array>) {
|
|
3451
|
+
* chunks.push(chunk);
|
|
3452
|
+
* }
|
|
3453
|
+
* const totalLength = chunks.reduce((sum, c) => sum + c.length, 0);
|
|
3454
|
+
* const merged = new Uint8Array(totalLength);
|
|
3455
|
+
* let offset = 0;
|
|
3456
|
+
* for (const chunk of chunks) {
|
|
3457
|
+
* merged.set(chunk, offset);
|
|
3458
|
+
* offset += chunk.length;
|
|
3459
|
+
* }
|
|
3460
|
+
* return {
|
|
3461
|
+
* audio: merged.buffer,
|
|
3462
|
+
* contentType: result.ContentType ?? 'audio/pcm',
|
|
3463
|
+
* };
|
|
3464
|
+
* },
|
|
3465
|
+
* });
|
|
3466
|
+
*
|
|
3467
|
+
* await tts.load();
|
|
3468
|
+
* for await (const chunk of tts.stream("Hello world!")) {
|
|
3469
|
+
* playbackPipeline.feedBuffer(chunk.audio);
|
|
3470
|
+
* }
|
|
3471
|
+
* ```
|
|
3472
|
+
*/
|
|
3473
|
+
|
|
3474
|
+
/**
|
|
3475
|
+
* Result from the consumer-provided synthesize function.
|
|
3476
|
+
*/
|
|
3477
|
+
interface PollySynthesizeResult {
|
|
3478
|
+
/** Raw PCM audio bytes (Int16 LE) */
|
|
3479
|
+
audio: ArrayBuffer;
|
|
3480
|
+
/** Content type from Polly response (e.g., 'audio/pcm') */
|
|
3481
|
+
contentType: string;
|
|
3482
|
+
}
|
|
3483
|
+
/**
|
|
3484
|
+
* Configuration for PollyTTSBackend.
|
|
3485
|
+
*
|
|
3486
|
+
* The `synthesizeFn` callback lets consumers use their own AWS SDK setup
|
|
3487
|
+
* (credentials, region, SDK version) without @omote/core depending on `@aws-sdk/client-polly`.
|
|
3488
|
+
*/
|
|
3489
|
+
interface PollyConfig {
|
|
3490
|
+
/**
|
|
3491
|
+
* Consumer-provided function that calls AWS Polly.
|
|
3492
|
+
* Must return PCM audio (Int16 LE) at the requested sample rate.
|
|
3493
|
+
*
|
|
3494
|
+
* @param text - Text to synthesize
|
|
3495
|
+
* @param voice - Polly voice ID (e.g., 'Joanna')
|
|
3496
|
+
* @param sampleRate - Requested output sample rate (e.g., 16000)
|
|
3497
|
+
* @returns PCM audio buffer and content type
|
|
3498
|
+
*/
|
|
3499
|
+
synthesizeFn: (text: string, voice: string, sampleRate: number) => Promise<PollySynthesizeResult>;
|
|
3500
|
+
/** Polly voice ID (default: 'Joanna') */
|
|
3501
|
+
voice?: string;
|
|
3502
|
+
/** Output sample rate in Hz (default: 16000) */
|
|
3503
|
+
sampleRate?: number;
|
|
3504
|
+
/** Polly engine type (default: 'neural') */
|
|
3505
|
+
engine?: 'neural' | 'standard' | 'generative' | 'long-form';
|
|
3506
|
+
}
|
|
3507
|
+
declare class PollyTTSBackend implements TTSBackend {
|
|
3508
|
+
private readonly synthesizeFn;
|
|
3509
|
+
private readonly voice;
|
|
3510
|
+
private readonly _sampleRate;
|
|
3511
|
+
private readonly engine;
|
|
3512
|
+
private _isLoaded;
|
|
3513
|
+
constructor(config: PollyConfig);
|
|
3514
|
+
get sampleRate(): number;
|
|
3515
|
+
get isLoaded(): boolean;
|
|
3516
|
+
/**
|
|
3517
|
+
* No-op for cloud TTS (no model to load).
|
|
3518
|
+
* Marks backend as ready.
|
|
3519
|
+
*/
|
|
3520
|
+
load(): Promise<void>;
|
|
3521
|
+
/**
|
|
3522
|
+
* Synthesize audio via consumer's Polly function.
|
|
3523
|
+
*
|
|
3524
|
+
* Polly's SynthesizeSpeech is request/response (not streaming for PCM),
|
|
3525
|
+
* so this yields a single chunk per call. For long text, consider splitting
|
|
3526
|
+
* into sentences on the consumer side.
|
|
3527
|
+
*/
|
|
3528
|
+
stream(text: string, options?: TTSStreamOptions): AsyncGenerator<TTSChunk>;
|
|
3529
|
+
dispose(): Promise<void>;
|
|
3530
|
+
}
|
|
3531
|
+
|
|
3328
3532
|
/**
|
|
3329
3533
|
* ORT CDN configuration
|
|
3330
3534
|
*
|
|
@@ -3889,6 +4093,12 @@ declare const MetricNames: {
|
|
|
3889
4093
|
readonly CACHE_HITS: "omote.cache.hits";
|
|
3890
4094
|
/** Counter: Cache misses */
|
|
3891
4095
|
readonly CACHE_MISSES: "omote.cache.misses";
|
|
4096
|
+
/** Counter: Cache stale (version/etag mismatch) */
|
|
4097
|
+
readonly CACHE_STALE: "omote.cache.stale";
|
|
4098
|
+
/** Counter: Cache quota warning (>90% used) */
|
|
4099
|
+
readonly CACHE_QUOTA_WARNING: "omote.cache.quota_warning";
|
|
4100
|
+
/** Counter: Cache eviction (LRU) */
|
|
4101
|
+
readonly CACHE_EVICTION: "omote.cache.eviction";
|
|
3892
4102
|
/** Histogram: VoicePipeline turn latency (speech end → transcript ready, excludes playback) */
|
|
3893
4103
|
readonly VOICE_TURN_LATENCY: "omote.voice.turn.latency";
|
|
3894
4104
|
/** Histogram: ASR transcription latency in ms */
|
|
@@ -5517,6 +5727,7 @@ declare class VoicePipeline extends EventEmitter<VoicePipelineEvents> {
|
|
|
5517
5727
|
private lastProgressiveResult;
|
|
5518
5728
|
private lastProgressiveSamples;
|
|
5519
5729
|
private asrErrorCount;
|
|
5730
|
+
private progressiveErrorCount;
|
|
5520
5731
|
private responseAbortController;
|
|
5521
5732
|
private _unsubChunk;
|
|
5522
5733
|
private _unsubLevel;
|
|
@@ -5645,4 +5856,4 @@ declare class VoiceOrchestrator extends EventEmitter<VoiceOrchestratorEvents> {
|
|
|
5645
5856
|
private setState;
|
|
5646
5857
|
}
|
|
5647
5858
|
|
|
5648
|
-
export { type A2EBackend, A2EInference, type A2EInferenceConfig, type A2EModelInfo, A2EProcessor, type A2EProcessorConfig, type A2EResult, A2EUnifiedAdapter, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, type AnimationController, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationSource, type AnimationSourceOptions, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, type BoneFilterConfig, type CacheConfig, type CacheSpanAttributes, CharacterController, type CharacterControllerConfig, type CharacterProfile, type CharacterUpdateInput, type CharacterUpdateOutput, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateKokoroTTSConfig, type CreateSenseVoiceConfig, type CreateTTSPlayerConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_BONE_FILTER, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, EXPLICIT_EMOTION_COUNT, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, type ErrorType, ErrorTypes, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FrameSource, type FullFaceFrame, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceFactoryConfig, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, KOKORO_VOICES, type KokoroStreamChunk, type KokoroTTSConfig, KokoroTTSInference, type KokoroTTSModelInfo, type KokoroTTSResult, KokoroTTSUnifiedAdapter, KokoroTTSWorker, type KokoroVoiceName, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MIXAMO_PREFIX, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PRESERVE_POSITION_BONES, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, ProceduralLifeLayer, type Quat, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, SpeechListener, type SpeechListenerConfig, type SpeechListenerEvents, type SpeechListenerState, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type SynthesizeOptions, type TTSBackend, type TTSChunk, TTSPlayback, type TTSPlaybackConfig, type TTSPlaybackEvents, TTSPlayer, TTSSpeaker, type TTSSpeakerConfig, type TTSStreamOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TrackDescriptor, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Vec3, VoiceOrchestrator, type VoiceOrchestratorCloudConfig, type VoiceOrchestratorConfig, type VoiceOrchestratorEvents, type VoiceOrchestratorLocalConfig, VoicePipeline, type VoicePipelineCloudConfig, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineLocalConfig, type VoicePipelineState, A2EInference as Wav2Vec2Inference, type WorkerHealthState, analyzeTextEmotion, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureOrtCdn, configureTelemetry, createA2E, createEmotionVector, createKokoroTTS, createSenseVoice, createSileroVAD, createTTSPlayer, fetchWithCache, float32ToPcm16, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getOrtCdnBase, getRecommendedBackend, getTelemetry, hasWebGPUApi, int16ToFloat32, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, listVoices as listKokoroVoices, parseEmotionTags, pcm16ToFloat32, preloadModels, resampleLinear, resetModelUrls, resolveBackend, resolveEmotion, shouldEnableWasmProxy, shouldKeepTrack, shouldUseNativeASR, shouldUseServerA2E, stripMixamoPrefix, supportsVADWorker, ttsToPlaybackFormat, validateTTSInput };
|
|
5859
|
+
export { type A2EBackend, A2EInference, type A2EInferenceConfig, type A2EModelInfo, A2EProcessor, type A2EProcessorConfig, type A2EResult, A2EUnifiedAdapter, ALL_AUS, ARKIT_BLENDSHAPES, type AUActivation, AU_TO_ARKIT, type ActiveSpan, type AnimationClip, type AnimationController, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationSource, type AnimationSourceOptions, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, type BoneFilterConfig, type CacheConfig, type CacheSpanAttributes, CharacterController, type CharacterControllerConfig, type CharacterProfile, type CharacterUpdateInput, type CharacterUpdateOutput, ConsoleExporter, type ConversationalState, type CreateA2EConfig, type CreateKokoroTTSConfig, type CreateSenseVoiceConfig, type CreateTTSPlayerConfig, DEFAULT_ANIMATION_CONFIG, DEFAULT_BONE_FILTER, DEFAULT_MODEL_URLS, EMOTION_NAMES, EMOTION_TO_AU, EMOTION_VECTOR_SIZE, EXPLICIT_EMOTION_COUNT, type ElevenLabsConfig, ElevenLabsTTSBackend, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, EmotionResolver, type EmotionWeights, EmphasisDetector, type ErrorType, ErrorTypes, EventEmitter, type ExpressionProfile, FaceCompositor, type FaceCompositorConfig, type FaceCompositorInput, type FaceCompositorOutput, type FetchWithCacheOptions, type FrameSource, type FullFaceFrame, HF_CDN_URLS, INFERENCE_LATENCY_BUCKETS, type InferenceFactoryConfig, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, KOKORO_VOICES, type KokoroStreamChunk, type KokoroTTSConfig, KokoroTTSInference, type KokoroTTSModelInfo, type KokoroTTSResult, KokoroTTSUnifiedAdapter, KokoroTTSWorker, type KokoroVoiceName, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, type LoadingProgress, MIXAMO_PREFIX, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicLipSync, type MicLipSyncConfig, type MicLipSyncEvents, type MicLipSyncFrame, type MicLipSyncState, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, type ModelUrlKey, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, PRESERVE_POSITION_BONES, PlaybackPipeline, type PlaybackPipelineConfig, type PlaybackPipelineEvents, type PlaybackState, type PollyConfig, type PollySynthesizeResult, PollyTTSBackend, ProceduralLifeLayer, type Quat, type QuotaInfo, type ResolvedEmotion, type ResponseHandler, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, SpeechListener, type SpeechListenerConfig, type SpeechListenerEvents, type SpeechListenerState, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type SynthesizeOptions, type TTSBackend, type TTSChunk, TTSPlayback, type TTSPlaybackConfig, type TTSPlaybackEvents, TTSPlayer, TTSSpeaker, type TTSSpeakerConfig, type TTSStreamOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TrackDescriptor, type TranscriptResult, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Vec3, VoiceOrchestrator, type VoiceOrchestratorCloudConfig, type VoiceOrchestratorConfig, type VoiceOrchestratorEvents, type VoiceOrchestratorLocalConfig, VoicePipeline, type VoicePipelineCloudConfig, type VoicePipelineConfig, type VoicePipelineEvents, type VoicePipelineLocalConfig, type VoicePipelineState, A2EInference as Wav2Vec2Inference, type WorkerHealthState, analyzeTextEmotion, applyProfile, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureModelUrls, configureOrtCdn, configureTelemetry, createA2E, createEmotionVector, createKokoroTTS, createSenseVoice, createSileroVAD, createTTSPlayer, fetchWithCache, float32ToPcm16, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getOrtCdnBase, getRecommendedBackend, getTelemetry, hasWebGPUApi, int16ToFloat32, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, listVoices as listKokoroVoices, parseEmotionTags, pcm16ToFloat32, preloadModels, resampleLinear, resetModelUrls, resolveBackend, resolveEmotion, shouldEnableWasmProxy, shouldKeepTrack, shouldUseNativeASR, shouldUseServerA2E, stripMixamoPrefix, supportsVADWorker, ttsToPlaybackFormat, validateTTSInput };
|