@omote/core 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.js';
2
- export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
1
+ import { EventEmitter, OmoteEvents } from './events/index.js';
2
+ export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.js';
3
3
  export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.js';
4
4
  export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
5
5
 
@@ -587,18 +587,7 @@ interface FullFacePipelineOptions {
587
587
  /** Per-character expression weight scaling */
588
588
  profile?: ExpressionProfile;
589
589
  /**
590
- * Spring smoothing halflife in seconds.
591
- * Controls how quickly blendshapes converge to inference targets.
592
- * Lower = snappier but more jittery. Higher = smoother but laggy.
593
- * Set to 0 to disable smoothing (raw frame pass-through).
594
- *
595
- * Default: 0.06 (60ms)
596
- */
597
- smoothingHalflife?: number;
598
- /**
599
- * Time in ms with no new inference frames before decaying to neutral.
600
- * When exceeded, spring targets are set to 0 and the face smoothly
601
- * relaxes rather than freezing on the last frame.
590
+ * Time in ms with no new inference frames before logging a stale warning.
602
591
  *
603
592
  * Must be larger than the inter-batch gap (chunkSize/sampleRate + inference time).
604
593
  * Default: 2000
@@ -646,7 +635,6 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
646
635
  private scheduler;
647
636
  private coalescer;
648
637
  private processor;
649
- private smoother;
650
638
  private playbackStarted;
651
639
  private monitorInterval;
652
640
  private frameAnimationId;
@@ -654,7 +642,6 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
654
642
  private lastKnownLamFrame;
655
643
  private staleWarningEmitted;
656
644
  private readonly staleThresholdMs;
657
- private lastFrameLoopTime;
658
645
  private frameLoopCount;
659
646
  private profile;
660
647
  constructor(options: FullFacePipelineOptions);
@@ -694,10 +681,9 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
694
681
  /**
695
682
  * Start frame animation loop
696
683
  *
697
- * Uses critically damped spring smoother to produce continuous output
698
- * at render rate (60fps), even between inference batches (~30fps bursts).
699
- * Springs interpolate toward the latest inference target, and decay
700
- * to neutral when inference stalls.
684
+ * Polls A2EProcessor at render rate (60fps) for the latest inference frame
685
+ * matching the current AudioContext time. Between inference batches (~30fps
686
+ * bursts), getFrameForTime() holds the last frame.
701
687
  */
702
688
  private startFrameLoop;
703
689
  /**
@@ -733,6 +719,78 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
733
719
  dispose(): void;
734
720
  }
735
721
 
722
+ /**
723
+ * Interruption Handler
724
+ *
725
+ * VAD-based barge-in detection for AI conversations:
726
+ * - Monitors VAD probability for user speech
727
+ * - Detects when user interrupts AI response
728
+ * - Triggers interruption callbacks
729
+ */
730
+
731
+ interface InterruptionEvents {
732
+ [key: string]: unknown;
733
+ 'speech.detected': {
734
+ rms: number;
735
+ };
736
+ 'speech.ended': {
737
+ durationMs: number;
738
+ };
739
+ 'interruption.triggered': {
740
+ rms: number;
741
+ durationMs: number;
742
+ };
743
+ }
744
+ /**
745
+ * Interruption handler configuration
746
+ *
747
+ * Industry standards applied:
748
+ * - vadThreshold: 0.5 (Silero VAD default)
749
+ * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
750
+ * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
751
+ */
752
+ interface InterruptionConfig {
753
+ /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
754
+ vadThreshold?: number;
755
+ /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
756
+ minSpeechDurationMs?: number;
757
+ /** Silence duration to end speech (default: 500ms, OpenAI standard) */
758
+ silenceTimeoutMs?: number;
759
+ /** Enable interruption detection (default: true) */
760
+ enabled?: boolean;
761
+ }
762
+ declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
763
+ private config;
764
+ private isSpeaking;
765
+ private speechStartTime;
766
+ private lastSpeechTime;
767
+ private silenceTimer;
768
+ private aiIsSpeaking;
769
+ private interruptionTriggeredThisSession;
770
+ constructor(config?: InterruptionConfig);
771
+ /**
772
+ * Process VAD result for interruption detection
773
+ * @param vadProbability - Speech probability from VAD (0-1)
774
+ * @param audioEnergy - Optional RMS energy for logging (default: 0)
775
+ */
776
+ processVADResult(vadProbability: number, audioEnergy?: number): void;
777
+ /** Notify that AI started/stopped speaking */
778
+ setAISpeaking(speaking: boolean): void;
779
+ /** Enable/disable interruption detection */
780
+ setEnabled(enabled: boolean): void;
781
+ /** Update configuration */
782
+ updateConfig(config: Partial<InterruptionConfig>): void;
783
+ /** Reset state */
784
+ reset(): void;
785
+ /** Get current state */
786
+ getState(): {
787
+ isSpeaking: boolean;
788
+ speechDurationMs: number;
789
+ };
790
+ private onSpeechDetected;
791
+ private onSilenceDetected;
792
+ }
793
+
736
794
  /**
737
795
  * Lazy ONNX Runtime loader with conditional WebGPU/WASM bundle loading
738
796
  *
@@ -2835,813 +2893,6 @@ declare class EmotionController {
2835
2893
  reset(): void;
2836
2894
  }
2837
2895
 
2838
- /**
2839
- * AI Adapter Interface
2840
- *
2841
- * Common interface for AI backends (AWS AgentCore, OpenAI Realtime).
2842
- * Adapters handle the conversation flow and emit events for animation.
2843
- *
2844
- * @category AI
2845
- */
2846
-
2847
- /**
2848
- * Tenant configuration for multi-tenant isolation
2849
- */
2850
- interface TenantConfig {
2851
- /** Unique tenant identifier */
2852
- tenantId: string;
2853
- /** Customer-specific API credentials */
2854
- credentials: {
2855
- apiKey?: string;
2856
- authToken?: string;
2857
- refreshToken?: string;
2858
- };
2859
- /** Character configuration for this tenant */
2860
- characterId: string;
2861
- /** Optional custom endpoint override */
2862
- endpoint?: string;
2863
- }
2864
- /**
2865
- * Voice configuration for TTS
2866
- */
2867
- interface VoiceConfig {
2868
- /** TTS provider */
2869
- provider: 'elevenlabs' | 'openai';
2870
- /** Voice ID */
2871
- voiceId: string;
2872
- /** Stability (0-1, ElevenLabs) */
2873
- stability?: number;
2874
- /** Similarity boost (0-1, ElevenLabs) */
2875
- similarityBoost?: number;
2876
- }
2877
- /**
2878
- * Session configuration
2879
- */
2880
- interface SessionConfig {
2881
- /** Session ID (generated or provided) */
2882
- sessionId: string;
2883
- /** Tenant this session belongs to */
2884
- tenant: TenantConfig;
2885
- /** Initial system prompt / personality */
2886
- systemPrompt?: string;
2887
- /** Voice configuration for TTS */
2888
- voice?: VoiceConfig;
2889
- /** Initial emotion state */
2890
- emotion?: string;
2891
- /** Language code */
2892
- language?: string;
2893
- }
2894
- /**
2895
- * Message role in conversation
2896
- */
2897
- type MessageRole = 'user' | 'assistant' | 'system';
2898
- /**
2899
- * Conversation message in session history
2900
- */
2901
- interface ConversationMessage {
2902
- /** Message role */
2903
- role: MessageRole;
2904
- /** Text content */
2905
- content: string;
2906
- /** Timestamp (ms) */
2907
- timestamp: number;
2908
- /** Emotion detected/expressed */
2909
- emotion?: string;
2910
- /** Audio duration if applicable (ms) */
2911
- audioDurationMs?: number;
2912
- }
2913
-
2914
- /**
2915
- * Events emitted by AI adapters
2916
- */
2917
- interface AIAdapterEvents {
2918
- [key: string]: unknown;
2919
- 'state.change': {
2920
- state: AISessionState;
2921
- previousState: AISessionState;
2922
- };
2923
- 'user.speech.start': {
2924
- timestamp: number;
2925
- };
2926
- 'user.speech.end': {
2927
- timestamp: number;
2928
- durationMs: number;
2929
- };
2930
- 'user.transcript.partial': {
2931
- text: string;
2932
- confidence: number;
2933
- };
2934
- 'user.transcript.final': {
2935
- text: string;
2936
- confidence: number;
2937
- };
2938
- 'ai.thinking.start': {
2939
- timestamp: number;
2940
- };
2941
- 'ai.response.start': {
2942
- text?: string;
2943
- emotion?: string;
2944
- };
2945
- 'ai.response.chunk': {
2946
- text: string;
2947
- isLast: boolean;
2948
- };
2949
- 'ai.response.end': {
2950
- fullText: string;
2951
- durationMs: number;
2952
- };
2953
- 'audio.output.chunk': {
2954
- audio: ArrayBuffer;
2955
- sampleRate: number;
2956
- timestamp: number;
2957
- };
2958
- 'audio.output.end': {
2959
- durationMs: number;
2960
- };
2961
- 'animation': AnimationEvent;
2962
- 'memory.updated': {
2963
- messageCount: number;
2964
- tokenCount?: number;
2965
- };
2966
- 'connection.opened': {
2967
- sessionId: string;
2968
- adapter: string;
2969
- };
2970
- 'connection.closed': {
2971
- reason: string;
2972
- };
2973
- 'connection.error': {
2974
- error: Error;
2975
- recoverable: boolean;
2976
- };
2977
- 'interruption.detected': {
2978
- timestamp: number;
2979
- };
2980
- 'interruption.handled': {
2981
- action: 'stop' | 'continue';
2982
- timestamp: number;
2983
- };
2984
- }
2985
- /**
2986
- * Base interface for all AI adapters
2987
- */
2988
- interface AIAdapter {
2989
- /** Adapter name for logging/debugging */
2990
- readonly name: string;
2991
- /** Current session state */
2992
- readonly state: AISessionState;
2993
- /** Current session ID (null if not connected) */
2994
- readonly sessionId: string | null;
2995
- /** Whether the adapter is connected */
2996
- readonly isConnected: boolean;
2997
- /**
2998
- * Initialize and connect the adapter
2999
- */
3000
- connect(config: SessionConfig): Promise<void>;
3001
- /**
3002
- * Disconnect and cleanup
3003
- */
3004
- disconnect(): Promise<void>;
3005
- /**
3006
- * Push user audio for processing
3007
- * @param audio - PCM audio data (16kHz, mono)
3008
- */
3009
- pushAudio(audio: Int16Array | Float32Array): void;
3010
- /**
3011
- * Send text message directly (bypasses STT)
3012
- */
3013
- sendText(text: string): Promise<void>;
3014
- /**
3015
- * Handle user interruption
3016
- * Stops current AI speech and prepares for new input
3017
- */
3018
- interrupt(): void;
3019
- /**
3020
- * Get conversation history
3021
- */
3022
- getHistory(): ConversationMessage[];
3023
- /**
3024
- * Clear conversation history
3025
- */
3026
- clearHistory(): void;
3027
- /**
3028
- * Check if adapter is available/healthy
3029
- */
3030
- healthCheck(): Promise<boolean>;
3031
- on<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3032
- off<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): void;
3033
- once<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3034
- }
3035
-
3036
- /**
3037
- * Conversation Session Interface
3038
- *
3039
- * Represents an active conversation with memory and state.
3040
- *
3041
- * @category AI
3042
- */
3043
-
3044
- /**
3045
- * Serializable session snapshot for persistence
3046
- */
3047
- interface SessionSnapshot {
3048
- /** Session ID */
3049
- sessionId: string;
3050
- /** Tenant ID */
3051
- tenantId: string;
3052
- /** Character ID */
3053
- characterId: string;
3054
- /** Conversation history */
3055
- history: ConversationMessage[];
3056
- /** Custom context */
3057
- context: Record<string, string>;
3058
- /** Emotion state */
3059
- emotion: EmotionWeights;
3060
- /** Creation timestamp */
3061
- createdAt: number;
3062
- /** Last activity timestamp */
3063
- lastActivityAt: number;
3064
- }
3065
- /**
3066
- * Extended session with memory management
3067
- */
3068
- interface ConversationSession {
3069
- /** Session identifier */
3070
- readonly sessionId: string;
3071
- /** Associated AI adapter */
3072
- readonly adapter: AIAdapter;
3073
- /** Session configuration */
3074
- readonly config: SessionConfig;
3075
- /** Current state */
3076
- readonly state: AISessionState;
3077
- /** Conversation history */
3078
- readonly history: ConversationMessage[];
3079
- /** Current emotion state */
3080
- readonly emotion: EmotionWeights;
3081
- /** Session creation timestamp */
3082
- readonly createdAt: number;
3083
- /** Last activity timestamp */
3084
- readonly lastActivityAt: number;
3085
- /**
3086
- * Start the session (connects adapter)
3087
- */
3088
- start(): Promise<void>;
3089
- /**
3090
- * End the session (disconnects adapter)
3091
- */
3092
- end(): Promise<void>;
3093
- /**
3094
- * Push audio input
3095
- */
3096
- pushAudio(audio: Int16Array | Float32Array): void;
3097
- /**
3098
- * Send text input directly
3099
- */
3100
- sendText(text: string): Promise<void>;
3101
- /**
3102
- * Interrupt current AI response
3103
- */
3104
- interrupt(): void;
3105
- /**
3106
- * Update emotion state
3107
- */
3108
- setEmotion(emotion: EmotionWeights): void;
3109
- /**
3110
- * Add a context item (custom memory)
3111
- */
3112
- addContext(key: string, value: string): void;
3113
- /**
3114
- * Remove a context item
3115
- */
3116
- removeContext(key: string): void;
3117
- /**
3118
- * Get all context items
3119
- */
3120
- getContext(): Record<string, string>;
3121
- /**
3122
- * Export session for persistence
3123
- */
3124
- export(): SessionSnapshot;
3125
- /**
3126
- * Import session from snapshot
3127
- */
3128
- import(snapshot: SessionSnapshot): void;
3129
- }
3130
-
3131
- /**
3132
- * AWS AgentCore Adapter
3133
- *
3134
- * Primary AI adapter for the Omote Platform.
3135
- *
3136
- * Pipeline:
3137
- * User Audio -> Whisper ASR (local) -> Text
3138
- * Text -> AgentCore (WebSocket) -> Response Text + Audio chunks (TTS handled backend-side)
3139
- * Audio chunks -> LAM (local) -> Blendshapes -> Render
3140
- *
3141
- * @category AI
3142
- */
3143
-
3144
- /**
3145
- * AgentCore-specific configuration
3146
- */
3147
- interface AgentCoreConfig {
3148
- /** AgentCore WebSocket endpoint */
3149
- endpoint: string;
3150
- /** AWS region */
3151
- region?: string;
3152
- /** Model URLs */
3153
- models?: {
3154
- lamUrl?: string;
3155
- };
3156
- /** Enable observability */
3157
- observability?: {
3158
- tracing?: boolean;
3159
- metrics?: boolean;
3160
- };
3161
- }
3162
- /**
3163
- * AWS AgentCore Adapter
3164
- */
3165
- declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements AIAdapter {
3166
- readonly name = "AgentCore";
3167
- private _state;
3168
- private _sessionId;
3169
- private _isConnected;
3170
- private asr;
3171
- private vad;
3172
- private lam;
3173
- private emotionController;
3174
- private pipeline;
3175
- private ws;
3176
- private wsReconnectAttempts;
3177
- private readonly maxReconnectAttempts;
3178
- private audioBuffer;
3179
- private history;
3180
- private currentConfig;
3181
- private agentCoreConfig;
3182
- private isSpeaking;
3183
- private currentTtsAbortController;
3184
- private tokenCache;
3185
- constructor(config: AgentCoreConfig);
3186
- get state(): AISessionState;
3187
- get sessionId(): string | null;
3188
- get isConnected(): boolean;
3189
- /**
3190
- * Connect to AgentCore with session configuration
3191
- */
3192
- connect(config: SessionConfig): Promise<void>;
3193
- /**
3194
- * Disconnect and cleanup
3195
- */
3196
- disconnect(): Promise<void>;
3197
- /**
3198
- * Push user audio for processing
3199
- */
3200
- pushAudio(audio: Int16Array | Float32Array): void;
3201
- /**
3202
- * Send text directly to AgentCore
3203
- */
3204
- sendText(text: string): Promise<void>;
3205
- /**
3206
- * Interrupt current AI response
3207
- */
3208
- interrupt(): void;
3209
- getHistory(): ConversationMessage[];
3210
- clearHistory(): void;
3211
- healthCheck(): Promise<boolean>;
3212
- private setState;
3213
- private getAuthToken;
3214
- private initASR;
3215
- private initLAM;
3216
- private initPipeline;
3217
- private connectWebSocket;
3218
- private handleAgentCoreMessage;
3219
- private scheduleTranscription;
3220
- /**
3221
- * Detect voice activity using Silero VAD
3222
- * Falls back to simple RMS if VAD not available
3223
- */
3224
- private detectVoiceActivity;
3225
- private base64ToArrayBuffer;
3226
- private addToHistory;
3227
- private handleDisconnect;
3228
- }
3229
-
3230
- /**
3231
- * Conversation Orchestrator
3232
- *
3233
- * Manages the conversation pipeline with AgentCore:
3234
- * - Handles session lifecycle and tenant isolation
3235
- * - Manages adapter events and state
3236
- *
3237
- * @category AI
3238
- */
3239
-
3240
- /**
3241
- * Orchestrator configuration
3242
- */
3243
- interface OrchestratorConfig {
3244
- /** AgentCore adapter config */
3245
- adapter: AgentCoreConfig;
3246
- /** Connection timeout in ms */
3247
- connectionTimeoutMs?: number;
3248
- /** Max retry attempts */
3249
- maxRetries?: number;
3250
- }
3251
- /**
3252
- * Orchestrator events (extends AI adapter events)
3253
- */
3254
- interface OrchestratorEvents extends AIAdapterEvents {
3255
- 'session.created': {
3256
- sessionId: string;
3257
- tenantId: string;
3258
- };
3259
- 'session.ended': {
3260
- sessionId: string;
3261
- reason: string;
3262
- };
3263
- }
3264
- /**
3265
- * Conversation Orchestrator
3266
- */
3267
- declare class ConversationOrchestrator extends EventEmitter<OrchestratorEvents> {
3268
- private config;
3269
- private adapter;
3270
- private sessions;
3271
- private tenants;
3272
- private healthCheckInterval;
3273
- private readonly HEALTH_CHECK_INTERVAL_MS;
3274
- constructor(config: OrchestratorConfig);
3275
- /**
3276
- * Register a tenant
3277
- */
3278
- registerTenant(tenant: TenantConfig): void;
3279
- /**
3280
- * Unregister a tenant
3281
- */
3282
- unregisterTenant(tenantId: string): void;
3283
- /**
3284
- * Get tenant config
3285
- */
3286
- getTenant(tenantId: string): TenantConfig | undefined;
3287
- /**
3288
- * Create a new conversation session for a tenant
3289
- */
3290
- createSession(tenantId: string, options?: Partial<SessionConfig>): Promise<ConversationSession>;
3291
- /**
3292
- * End a session
3293
- */
3294
- endSession(sessionId: string): Promise<void>;
3295
- /**
3296
- * Get session by ID
3297
- */
3298
- getSession(sessionId: string): ConversationSession | undefined;
3299
- /**
3300
- * Get all sessions for a tenant
3301
- */
3302
- getTenantSessions(tenantId: string): ConversationSession[];
3303
- /**
3304
- * Start health monitoring
3305
- */
3306
- startHealthMonitoring(): void;
3307
- /**
3308
- * Stop health monitoring
3309
- */
3310
- stopHealthMonitoring(): void;
3311
- /**
3312
- * Dispose all resources
3313
- */
3314
- dispose(): Promise<void>;
3315
- private generateSessionId;
3316
- private forwardAdapterEvents;
3317
- private performHealthCheck;
3318
- }
3319
-
3320
- /**
3321
- * Tenant Manager
3322
- *
3323
- * Handles multi-tenant isolation for the Omote Platform:
3324
- * - Credential isolation per tenant
3325
- * - Session scoping per tenant
3326
- * - Quota management
3327
- * - Token refresh
3328
- *
3329
- * @category AI
3330
- */
3331
-
3332
- /**
3333
- * Tenant quota configuration
3334
- */
3335
- interface TenantQuota {
3336
- /** Max concurrent sessions */
3337
- maxSessions: number;
3338
- /** Requests per minute */
3339
- requestsPerMinute: number;
3340
- /** Max tokens per conversation */
3341
- maxTokensPerConversation: number;
3342
- /** Max audio minutes per day */
3343
- maxAudioMinutesPerDay: number;
3344
- }
3345
- /**
3346
- * Tenant usage tracking
3347
- */
3348
- interface TenantUsage {
3349
- /** Current active sessions */
3350
- currentSessions: number;
3351
- /** Requests in current minute */
3352
- requestsThisMinute: number;
3353
- /** Total tokens used */
3354
- tokensUsed: number;
3355
- /** Audio minutes used today */
3356
- audioMinutesToday: number;
3357
- /** Last reset timestamp */
3358
- lastMinuteReset: number;
3359
- /** Last daily reset timestamp */
3360
- lastDailyReset: number;
3361
- }
3362
- /**
3363
- * Token refresh callback
3364
- */
3365
- type TokenRefreshCallback = () => Promise<string>;
3366
- /**
3367
- * Tenant Manager
3368
- */
3369
- declare class TenantManager {
3370
- private tenants;
3371
- private quotas;
3372
- private usage;
3373
- private tokenRefreshCallbacks;
3374
- /**
3375
- * Default quota for new tenants
3376
- */
3377
- static readonly DEFAULT_QUOTA: TenantQuota;
3378
- /**
3379
- * Register a tenant with quota
3380
- */
3381
- register(tenant: TenantConfig, quota?: TenantQuota, tokenRefreshCallback?: TokenRefreshCallback): void;
3382
- /**
3383
- * Unregister a tenant
3384
- */
3385
- unregister(tenantId: string): void;
3386
- /**
3387
- * Get tenant config
3388
- */
3389
- get(tenantId: string): TenantConfig | undefined;
3390
- /**
3391
- * Check if tenant exists
3392
- */
3393
- has(tenantId: string): boolean;
3394
- /**
3395
- * Get all tenant IDs
3396
- */
3397
- getTenantIds(): string[];
3398
- /**
3399
- * Check if tenant can create new session
3400
- */
3401
- canCreateSession(tenantId: string): boolean;
3402
- /**
3403
- * Check if tenant can make request
3404
- */
3405
- canMakeRequest(tenantId: string): boolean;
3406
- /**
3407
- * Check if tenant can use audio
3408
- */
3409
- canUseAudio(tenantId: string, minutes: number): boolean;
3410
- /**
3411
- * Increment session count
3412
- */
3413
- incrementSessions(tenantId: string): void;
3414
- /**
3415
- * Decrement session count
3416
- */
3417
- decrementSessions(tenantId: string): void;
3418
- /**
3419
- * Record a request
3420
- */
3421
- recordRequest(tenantId: string): void;
3422
- /**
3423
- * Record token usage
3424
- */
3425
- recordTokens(tenantId: string, tokens: number): void;
3426
- /**
3427
- * Record audio usage
3428
- */
3429
- recordAudioMinutes(tenantId: string, minutes: number): void;
3430
- /**
3431
- * Get fresh auth token for tenant
3432
- */
3433
- getAuthToken(tenantId: string): Promise<string>;
3434
- /**
3435
- * Update tenant credentials
3436
- */
3437
- updateCredentials(tenantId: string, credentials: Partial<TenantConfig['credentials']>): void;
3438
- /**
3439
- * Get usage stats for tenant
3440
- */
3441
- getUsage(tenantId: string): TenantUsage | undefined;
3442
- /**
3443
- * Get quota for tenant
3444
- */
3445
- getQuota(tenantId: string): TenantQuota | undefined;
3446
- /**
3447
- * Update quota for tenant
3448
- */
3449
- updateQuota(tenantId: string, quota: Partial<TenantQuota>): void;
3450
- /**
3451
- * Reset all usage stats for a tenant
3452
- */
3453
- resetUsage(tenantId: string): void;
3454
- private checkMinuteReset;
3455
- private checkDailyReset;
3456
- }
3457
-
3458
- /**
3459
- * Audio Sync Manager
3460
- *
3461
- * Synchronizes TTS audio playback with lip sync animation:
3462
- * - Buffers audio for inference
3463
- * - Manages playback timing
3464
- * - Handles audio queue for streaming
3465
- *
3466
- * @category AI
3467
- */
3468
-
3469
- /**
3470
- * Audio sync events
3471
- */
3472
- interface AudioSyncEvents {
3473
- [key: string]: unknown;
3474
- 'buffer.ready': {
3475
- audio: Float32Array;
3476
- };
3477
- 'playback.start': Record<string, never>;
3478
- 'playback.end': Record<string, never>;
3479
- 'sync.drift': {
3480
- driftMs: number;
3481
- };
3482
- }
3483
- /**
3484
- * Audio sync configuration
3485
- */
3486
- interface AudioSyncConfig {
3487
- /** Target sample rate (default: 16000) */
3488
- sampleRate?: number;
3489
- /** Buffer size for inference (default: 16640) */
3490
- bufferSize?: number;
3491
- /** Overlap between buffers (default: 4160) */
3492
- overlapSize?: number;
3493
- /** Max drift before correction (default: 100ms) */
3494
- maxDriftMs?: number;
3495
- }
3496
- /**
3497
- * Audio Sync Manager
3498
- */
3499
- declare class AudioSyncManager extends EventEmitter<AudioSyncEvents> {
3500
- private config;
3501
- private audioBuffer;
3502
- private bufferPosition;
3503
- private playbackQueue;
3504
- private isPlaying;
3505
- private audioContext;
3506
- private playbackStartTime;
3507
- private samplesPlayed;
3508
- constructor(config?: AudioSyncConfig);
3509
- /**
3510
- * Initialize audio context
3511
- */
3512
- initialize(): Promise<void>;
3513
- /**
3514
- * Push audio chunk for processing and playback
3515
- */
3516
- pushAudio(audio: Float32Array): void;
3517
- /**
3518
- * Buffer audio for inference
3519
- */
3520
- private bufferForInference;
3521
- /**
3522
- * Start audio playback
3523
- */
3524
- private startPlayback;
3525
- /**
3526
- * Process playback queue
3527
- */
3528
- private processPlaybackQueue;
3529
- /**
3530
- * Check for audio/animation drift
3531
- */
3532
- private checkDrift;
3533
- /**
3534
- * Clear playback queue
3535
- */
3536
- clearQueue(): void;
3537
- /**
3538
- * Stop playback
3539
- */
3540
- stop(): void;
3541
- /**
3542
- * Get current playback position in seconds
3543
- */
3544
- getPlaybackPosition(): number;
3545
- /**
3546
- * Check if currently playing
3547
- */
3548
- getIsPlaying(): boolean;
3549
- /**
3550
- * Dispose resources
3551
- */
3552
- dispose(): void;
3553
- }
3554
-
3555
- /**
3556
- * Interruption Handler
3557
- *
3558
- * VAD-based interruption detection for AI conversations:
3559
- * - Monitors user audio for speech
3560
- * - Detects when user interrupts AI response
3561
- * - Triggers interruption callbacks
3562
- *
3563
- * @category AI
3564
- */
3565
-
3566
- /**
3567
- * Interruption events
3568
- */
3569
- interface InterruptionEvents {
3570
- [key: string]: unknown;
3571
- 'speech.detected': {
3572
- rms: number;
3573
- };
3574
- 'speech.ended': {
3575
- durationMs: number;
3576
- };
3577
- 'interruption.triggered': {
3578
- rms: number;
3579
- durationMs: number;
3580
- };
3581
- }
3582
- /**
3583
- * Interruption handler configuration
3584
- *
3585
- * Industry standards applied:
3586
- * - vadThreshold: 0.5 (Silero VAD default)
3587
- * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
3588
- * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
3589
- */
3590
- interface InterruptionConfig {
3591
- /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
3592
- vadThreshold?: number;
3593
- /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
3594
- minSpeechDurationMs?: number;
3595
- /** Silence duration to end speech (default: 500ms, OpenAI standard) */
3596
- silenceTimeoutMs?: number;
3597
- /** Enable interruption detection (default: true) */
3598
- enabled?: boolean;
3599
- }
3600
- /**
3601
- * Interruption Handler
3602
- */
3603
- declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
3604
- private config;
3605
- private isSpeaking;
3606
- private speechStartTime;
3607
- private lastSpeechTime;
3608
- private silenceTimer;
3609
- private aiIsSpeaking;
3610
- private interruptionTriggeredThisSession;
3611
- constructor(config?: InterruptionConfig);
3612
- /**
3613
- * Process VAD result for interruption detection
3614
- * @param vadProbability - Speech probability from VAD (0-1)
3615
- * @param audioEnergy - Optional RMS energy for logging (default: 0)
3616
- */
3617
- processVADResult(vadProbability: number, audioEnergy?: number): void;
3618
- /**
3619
- * Notify that AI started speaking
3620
- */
3621
- setAISpeaking(speaking: boolean): void;
3622
- /**
3623
- * Enable/disable interruption detection
3624
- */
3625
- setEnabled(enabled: boolean): void;
3626
- /**
3627
- * Update configuration
3628
- */
3629
- updateConfig(config: Partial<InterruptionConfig>): void;
3630
- /**
3631
- * Reset state
3632
- */
3633
- reset(): void;
3634
- /**
3635
- * Get current state
3636
- */
3637
- getState(): {
3638
- isSpeaking: boolean;
3639
- speechDurationMs: number;
3640
- };
3641
- private onSpeechDetected;
3642
- private onSilenceDetected;
3643
- }
3644
-
3645
2896
  /**
3646
2897
  * Model Cache
3647
2898
  *
@@ -4813,4 +4064,4 @@ declare class ProceduralLifeLayer {
4813
4064
  private updateBrowNoise;
4814
4065
  }
4815
4066
 
4816
- export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SessionConfig, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
4067
+ export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ARKIT_BLENDSHAPES, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };