@omote/core 0.5.3 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.mjs';
2
- export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
1
+ import { EventEmitter, OmoteEvents } from './events/index.mjs';
2
+ export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
3
3
  export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
4
4
  export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
5
5
 
@@ -719,6 +719,78 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
719
719
  dispose(): void;
720
720
  }
721
721
 
722
+ /**
723
+ * Interruption Handler
724
+ *
725
+ * VAD-based barge-in detection for AI conversations:
726
+ * - Monitors VAD probability for user speech
727
+ * - Detects when user interrupts AI response
728
+ * - Triggers interruption callbacks
729
+ */
730
+
731
+ interface InterruptionEvents {
732
+ [key: string]: unknown;
733
+ 'speech.detected': {
734
+ rms: number;
735
+ };
736
+ 'speech.ended': {
737
+ durationMs: number;
738
+ };
739
+ 'interruption.triggered': {
740
+ rms: number;
741
+ durationMs: number;
742
+ };
743
+ }
744
+ /**
745
+ * Interruption handler configuration
746
+ *
747
+ * Industry standards applied:
748
+ * - vadThreshold: 0.5 (Silero VAD default)
749
+ * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
750
+ * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
751
+ */
752
+ interface InterruptionConfig {
753
+ /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
754
+ vadThreshold?: number;
755
+ /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
756
+ minSpeechDurationMs?: number;
757
+ /** Silence duration to end speech (default: 500ms, OpenAI standard) */
758
+ silenceTimeoutMs?: number;
759
+ /** Enable interruption detection (default: true) */
760
+ enabled?: boolean;
761
+ }
762
+ declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
763
+ private config;
764
+ private isSpeaking;
765
+ private speechStartTime;
766
+ private lastSpeechTime;
767
+ private silenceTimer;
768
+ private aiIsSpeaking;
769
+ private interruptionTriggeredThisSession;
770
+ constructor(config?: InterruptionConfig);
771
+ /**
772
+ * Process VAD result for interruption detection
773
+ * @param vadProbability - Speech probability from VAD (0-1)
774
+ * @param audioEnergy - Optional RMS energy for logging (default: 0)
775
+ */
776
+ processVADResult(vadProbability: number, audioEnergy?: number): void;
777
+ /** Notify that AI started/stopped speaking */
778
+ setAISpeaking(speaking: boolean): void;
779
+ /** Enable/disable interruption detection */
780
+ setEnabled(enabled: boolean): void;
781
+ /** Update configuration */
782
+ updateConfig(config: Partial<InterruptionConfig>): void;
783
+ /** Reset state */
784
+ reset(): void;
785
+ /** Get current state */
786
+ getState(): {
787
+ isSpeaking: boolean;
788
+ speechDurationMs: number;
789
+ };
790
+ private onSpeechDetected;
791
+ private onSilenceDetected;
792
+ }
793
+
722
794
  /**
723
795
  * Lazy ONNX Runtime loader with conditional WebGPU/WASM bundle loading
724
796
  *
@@ -2821,813 +2893,6 @@ declare class EmotionController {
2821
2893
  reset(): void;
2822
2894
  }
2823
2895
 
2824
- /**
2825
- * AI Adapter Interface
2826
- *
2827
- * Common interface for AI backends (AWS AgentCore, OpenAI Realtime).
2828
- * Adapters handle the conversation flow and emit events for animation.
2829
- *
2830
- * @category AI
2831
- */
2832
-
2833
- /**
2834
- * Tenant configuration for multi-tenant isolation
2835
- */
2836
- interface TenantConfig {
2837
- /** Unique tenant identifier */
2838
- tenantId: string;
2839
- /** Customer-specific API credentials */
2840
- credentials: {
2841
- apiKey?: string;
2842
- authToken?: string;
2843
- refreshToken?: string;
2844
- };
2845
- /** Character configuration for this tenant */
2846
- characterId: string;
2847
- /** Optional custom endpoint override */
2848
- endpoint?: string;
2849
- }
2850
- /**
2851
- * Voice configuration for TTS
2852
- */
2853
- interface VoiceConfig {
2854
- /** TTS provider */
2855
- provider: 'elevenlabs' | 'openai';
2856
- /** Voice ID */
2857
- voiceId: string;
2858
- /** Stability (0-1, ElevenLabs) */
2859
- stability?: number;
2860
- /** Similarity boost (0-1, ElevenLabs) */
2861
- similarityBoost?: number;
2862
- }
2863
- /**
2864
- * Session configuration
2865
- */
2866
- interface SessionConfig {
2867
- /** Session ID (generated or provided) */
2868
- sessionId: string;
2869
- /** Tenant this session belongs to */
2870
- tenant: TenantConfig;
2871
- /** Initial system prompt / personality */
2872
- systemPrompt?: string;
2873
- /** Voice configuration for TTS */
2874
- voice?: VoiceConfig;
2875
- /** Initial emotion state */
2876
- emotion?: string;
2877
- /** Language code */
2878
- language?: string;
2879
- }
2880
- /**
2881
- * Message role in conversation
2882
- */
2883
- type MessageRole = 'user' | 'assistant' | 'system';
2884
- /**
2885
- * Conversation message in session history
2886
- */
2887
- interface ConversationMessage {
2888
- /** Message role */
2889
- role: MessageRole;
2890
- /** Text content */
2891
- content: string;
2892
- /** Timestamp (ms) */
2893
- timestamp: number;
2894
- /** Emotion detected/expressed */
2895
- emotion?: string;
2896
- /** Audio duration if applicable (ms) */
2897
- audioDurationMs?: number;
2898
- }
2899
-
2900
- /**
2901
- * Events emitted by AI adapters
2902
- */
2903
- interface AIAdapterEvents {
2904
- [key: string]: unknown;
2905
- 'state.change': {
2906
- state: AISessionState;
2907
- previousState: AISessionState;
2908
- };
2909
- 'user.speech.start': {
2910
- timestamp: number;
2911
- };
2912
- 'user.speech.end': {
2913
- timestamp: number;
2914
- durationMs: number;
2915
- };
2916
- 'user.transcript.partial': {
2917
- text: string;
2918
- confidence: number;
2919
- };
2920
- 'user.transcript.final': {
2921
- text: string;
2922
- confidence: number;
2923
- };
2924
- 'ai.thinking.start': {
2925
- timestamp: number;
2926
- };
2927
- 'ai.response.start': {
2928
- text?: string;
2929
- emotion?: string;
2930
- };
2931
- 'ai.response.chunk': {
2932
- text: string;
2933
- isLast: boolean;
2934
- };
2935
- 'ai.response.end': {
2936
- fullText: string;
2937
- durationMs: number;
2938
- };
2939
- 'audio.output.chunk': {
2940
- audio: ArrayBuffer;
2941
- sampleRate: number;
2942
- timestamp: number;
2943
- };
2944
- 'audio.output.end': {
2945
- durationMs: number;
2946
- };
2947
- 'animation': AnimationEvent;
2948
- 'memory.updated': {
2949
- messageCount: number;
2950
- tokenCount?: number;
2951
- };
2952
- 'connection.opened': {
2953
- sessionId: string;
2954
- adapter: string;
2955
- };
2956
- 'connection.closed': {
2957
- reason: string;
2958
- };
2959
- 'connection.error': {
2960
- error: Error;
2961
- recoverable: boolean;
2962
- };
2963
- 'interruption.detected': {
2964
- timestamp: number;
2965
- };
2966
- 'interruption.handled': {
2967
- action: 'stop' | 'continue';
2968
- timestamp: number;
2969
- };
2970
- }
2971
- /**
2972
- * Base interface for all AI adapters
2973
- */
2974
- interface AIAdapter {
2975
- /** Adapter name for logging/debugging */
2976
- readonly name: string;
2977
- /** Current session state */
2978
- readonly state: AISessionState;
2979
- /** Current session ID (null if not connected) */
2980
- readonly sessionId: string | null;
2981
- /** Whether the adapter is connected */
2982
- readonly isConnected: boolean;
2983
- /**
2984
- * Initialize and connect the adapter
2985
- */
2986
- connect(config: SessionConfig): Promise<void>;
2987
- /**
2988
- * Disconnect and cleanup
2989
- */
2990
- disconnect(): Promise<void>;
2991
- /**
2992
- * Push user audio for processing
2993
- * @param audio - PCM audio data (16kHz, mono)
2994
- */
2995
- pushAudio(audio: Int16Array | Float32Array): void;
2996
- /**
2997
- * Send text message directly (bypasses STT)
2998
- */
2999
- sendText(text: string): Promise<void>;
3000
- /**
3001
- * Handle user interruption
3002
- * Stops current AI speech and prepares for new input
3003
- */
3004
- interrupt(): void;
3005
- /**
3006
- * Get conversation history
3007
- */
3008
- getHistory(): ConversationMessage[];
3009
- /**
3010
- * Clear conversation history
3011
- */
3012
- clearHistory(): void;
3013
- /**
3014
- * Check if adapter is available/healthy
3015
- */
3016
- healthCheck(): Promise<boolean>;
3017
- on<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3018
- off<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): void;
3019
- once<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3020
- }
3021
-
3022
- /**
3023
- * Conversation Session Interface
3024
- *
3025
- * Represents an active conversation with memory and state.
3026
- *
3027
- * @category AI
3028
- */
3029
-
3030
- /**
3031
- * Serializable session snapshot for persistence
3032
- */
3033
- interface SessionSnapshot {
3034
- /** Session ID */
3035
- sessionId: string;
3036
- /** Tenant ID */
3037
- tenantId: string;
3038
- /** Character ID */
3039
- characterId: string;
3040
- /** Conversation history */
3041
- history: ConversationMessage[];
3042
- /** Custom context */
3043
- context: Record<string, string>;
3044
- /** Emotion state */
3045
- emotion: EmotionWeights;
3046
- /** Creation timestamp */
3047
- createdAt: number;
3048
- /** Last activity timestamp */
3049
- lastActivityAt: number;
3050
- }
3051
- /**
3052
- * Extended session with memory management
3053
- */
3054
- interface ConversationSession {
3055
- /** Session identifier */
3056
- readonly sessionId: string;
3057
- /** Associated AI adapter */
3058
- readonly adapter: AIAdapter;
3059
- /** Session configuration */
3060
- readonly config: SessionConfig;
3061
- /** Current state */
3062
- readonly state: AISessionState;
3063
- /** Conversation history */
3064
- readonly history: ConversationMessage[];
3065
- /** Current emotion state */
3066
- readonly emotion: EmotionWeights;
3067
- /** Session creation timestamp */
3068
- readonly createdAt: number;
3069
- /** Last activity timestamp */
3070
- readonly lastActivityAt: number;
3071
- /**
3072
- * Start the session (connects adapter)
3073
- */
3074
- start(): Promise<void>;
3075
- /**
3076
- * End the session (disconnects adapter)
3077
- */
3078
- end(): Promise<void>;
3079
- /**
3080
- * Push audio input
3081
- */
3082
- pushAudio(audio: Int16Array | Float32Array): void;
3083
- /**
3084
- * Send text input directly
3085
- */
3086
- sendText(text: string): Promise<void>;
3087
- /**
3088
- * Interrupt current AI response
3089
- */
3090
- interrupt(): void;
3091
- /**
3092
- * Update emotion state
3093
- */
3094
- setEmotion(emotion: EmotionWeights): void;
3095
- /**
3096
- * Add a context item (custom memory)
3097
- */
3098
- addContext(key: string, value: string): void;
3099
- /**
3100
- * Remove a context item
3101
- */
3102
- removeContext(key: string): void;
3103
- /**
3104
- * Get all context items
3105
- */
3106
- getContext(): Record<string, string>;
3107
- /**
3108
- * Export session for persistence
3109
- */
3110
- export(): SessionSnapshot;
3111
- /**
3112
- * Import session from snapshot
3113
- */
3114
- import(snapshot: SessionSnapshot): void;
3115
- }
3116
-
3117
- /**
3118
- * AWS AgentCore Adapter
3119
- *
3120
- * Primary AI adapter for the Omote Platform.
3121
- *
3122
- * Pipeline:
3123
- * User Audio -> Whisper ASR (local) -> Text
3124
- * Text -> AgentCore (WebSocket) -> Response Text + Audio chunks (TTS handled backend-side)
3125
- * Audio chunks -> LAM (local) -> Blendshapes -> Render
3126
- *
3127
- * @category AI
3128
- */
3129
-
3130
- /**
3131
- * AgentCore-specific configuration
3132
- */
3133
- interface AgentCoreConfig {
3134
- /** AgentCore WebSocket endpoint */
3135
- endpoint: string;
3136
- /** AWS region */
3137
- region?: string;
3138
- /** Model URLs */
3139
- models?: {
3140
- lamUrl?: string;
3141
- };
3142
- /** Enable observability */
3143
- observability?: {
3144
- tracing?: boolean;
3145
- metrics?: boolean;
3146
- };
3147
- }
3148
- /**
3149
- * AWS AgentCore Adapter
3150
- */
3151
- declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements AIAdapter {
3152
- readonly name = "AgentCore";
3153
- private _state;
3154
- private _sessionId;
3155
- private _isConnected;
3156
- private asr;
3157
- private vad;
3158
- private lam;
3159
- private emotionController;
3160
- private pipeline;
3161
- private ws;
3162
- private wsReconnectAttempts;
3163
- private readonly maxReconnectAttempts;
3164
- private audioBuffer;
3165
- private history;
3166
- private currentConfig;
3167
- private agentCoreConfig;
3168
- private isSpeaking;
3169
- private currentTtsAbortController;
3170
- private tokenCache;
3171
- constructor(config: AgentCoreConfig);
3172
- get state(): AISessionState;
3173
- get sessionId(): string | null;
3174
- get isConnected(): boolean;
3175
- /**
3176
- * Connect to AgentCore with session configuration
3177
- */
3178
- connect(config: SessionConfig): Promise<void>;
3179
- /**
3180
- * Disconnect and cleanup
3181
- */
3182
- disconnect(): Promise<void>;
3183
- /**
3184
- * Push user audio for processing
3185
- */
3186
- pushAudio(audio: Int16Array | Float32Array): void;
3187
- /**
3188
- * Send text directly to AgentCore
3189
- */
3190
- sendText(text: string): Promise<void>;
3191
- /**
3192
- * Interrupt current AI response
3193
- */
3194
- interrupt(): void;
3195
- getHistory(): ConversationMessage[];
3196
- clearHistory(): void;
3197
- healthCheck(): Promise<boolean>;
3198
- private setState;
3199
- private getAuthToken;
3200
- private initASR;
3201
- private initLAM;
3202
- private initPipeline;
3203
- private connectWebSocket;
3204
- private handleAgentCoreMessage;
3205
- private scheduleTranscription;
3206
- /**
3207
- * Detect voice activity using Silero VAD
3208
- * Falls back to simple RMS if VAD not available
3209
- */
3210
- private detectVoiceActivity;
3211
- private base64ToArrayBuffer;
3212
- private addToHistory;
3213
- private handleDisconnect;
3214
- }
3215
-
3216
- /**
3217
- * Conversation Orchestrator
3218
- *
3219
- * Manages the conversation pipeline with AgentCore:
3220
- * - Handles session lifecycle and tenant isolation
3221
- * - Manages adapter events and state
3222
- *
3223
- * @category AI
3224
- */
3225
-
3226
- /**
3227
- * Orchestrator configuration
3228
- */
3229
- interface OrchestratorConfig {
3230
- /** AgentCore adapter config */
3231
- adapter: AgentCoreConfig;
3232
- /** Connection timeout in ms */
3233
- connectionTimeoutMs?: number;
3234
- /** Max retry attempts */
3235
- maxRetries?: number;
3236
- }
3237
- /**
3238
- * Orchestrator events (extends AI adapter events)
3239
- */
3240
- interface OrchestratorEvents extends AIAdapterEvents {
3241
- 'session.created': {
3242
- sessionId: string;
3243
- tenantId: string;
3244
- };
3245
- 'session.ended': {
3246
- sessionId: string;
3247
- reason: string;
3248
- };
3249
- }
3250
- /**
3251
- * Conversation Orchestrator
3252
- */
3253
- declare class ConversationOrchestrator extends EventEmitter<OrchestratorEvents> {
3254
- private config;
3255
- private adapter;
3256
- private sessions;
3257
- private tenants;
3258
- private healthCheckInterval;
3259
- private readonly HEALTH_CHECK_INTERVAL_MS;
3260
- constructor(config: OrchestratorConfig);
3261
- /**
3262
- * Register a tenant
3263
- */
3264
- registerTenant(tenant: TenantConfig): void;
3265
- /**
3266
- * Unregister a tenant
3267
- */
3268
- unregisterTenant(tenantId: string): void;
3269
- /**
3270
- * Get tenant config
3271
- */
3272
- getTenant(tenantId: string): TenantConfig | undefined;
3273
- /**
3274
- * Create a new conversation session for a tenant
3275
- */
3276
- createSession(tenantId: string, options?: Partial<SessionConfig>): Promise<ConversationSession>;
3277
- /**
3278
- * End a session
3279
- */
3280
- endSession(sessionId: string): Promise<void>;
3281
- /**
3282
- * Get session by ID
3283
- */
3284
- getSession(sessionId: string): ConversationSession | undefined;
3285
- /**
3286
- * Get all sessions for a tenant
3287
- */
3288
- getTenantSessions(tenantId: string): ConversationSession[];
3289
- /**
3290
- * Start health monitoring
3291
- */
3292
- startHealthMonitoring(): void;
3293
- /**
3294
- * Stop health monitoring
3295
- */
3296
- stopHealthMonitoring(): void;
3297
- /**
3298
- * Dispose all resources
3299
- */
3300
- dispose(): Promise<void>;
3301
- private generateSessionId;
3302
- private forwardAdapterEvents;
3303
- private performHealthCheck;
3304
- }
3305
-
3306
- /**
3307
- * Tenant Manager
3308
- *
3309
- * Handles multi-tenant isolation for the Omote Platform:
3310
- * - Credential isolation per tenant
3311
- * - Session scoping per tenant
3312
- * - Quota management
3313
- * - Token refresh
3314
- *
3315
- * @category AI
3316
- */
3317
-
3318
- /**
3319
- * Tenant quota configuration
3320
- */
3321
- interface TenantQuota {
3322
- /** Max concurrent sessions */
3323
- maxSessions: number;
3324
- /** Requests per minute */
3325
- requestsPerMinute: number;
3326
- /** Max tokens per conversation */
3327
- maxTokensPerConversation: number;
3328
- /** Max audio minutes per day */
3329
- maxAudioMinutesPerDay: number;
3330
- }
3331
- /**
3332
- * Tenant usage tracking
3333
- */
3334
- interface TenantUsage {
3335
- /** Current active sessions */
3336
- currentSessions: number;
3337
- /** Requests in current minute */
3338
- requestsThisMinute: number;
3339
- /** Total tokens used */
3340
- tokensUsed: number;
3341
- /** Audio minutes used today */
3342
- audioMinutesToday: number;
3343
- /** Last reset timestamp */
3344
- lastMinuteReset: number;
3345
- /** Last daily reset timestamp */
3346
- lastDailyReset: number;
3347
- }
3348
- /**
3349
- * Token refresh callback
3350
- */
3351
- type TokenRefreshCallback = () => Promise<string>;
3352
- /**
3353
- * Tenant Manager
3354
- */
3355
- declare class TenantManager {
3356
- private tenants;
3357
- private quotas;
3358
- private usage;
3359
- private tokenRefreshCallbacks;
3360
- /**
3361
- * Default quota for new tenants
3362
- */
3363
- static readonly DEFAULT_QUOTA: TenantQuota;
3364
- /**
3365
- * Register a tenant with quota
3366
- */
3367
- register(tenant: TenantConfig, quota?: TenantQuota, tokenRefreshCallback?: TokenRefreshCallback): void;
3368
- /**
3369
- * Unregister a tenant
3370
- */
3371
- unregister(tenantId: string): void;
3372
- /**
3373
- * Get tenant config
3374
- */
3375
- get(tenantId: string): TenantConfig | undefined;
3376
- /**
3377
- * Check if tenant exists
3378
- */
3379
- has(tenantId: string): boolean;
3380
- /**
3381
- * Get all tenant IDs
3382
- */
3383
- getTenantIds(): string[];
3384
- /**
3385
- * Check if tenant can create new session
3386
- */
3387
- canCreateSession(tenantId: string): boolean;
3388
- /**
3389
- * Check if tenant can make request
3390
- */
3391
- canMakeRequest(tenantId: string): boolean;
3392
- /**
3393
- * Check if tenant can use audio
3394
- */
3395
- canUseAudio(tenantId: string, minutes: number): boolean;
3396
- /**
3397
- * Increment session count
3398
- */
3399
- incrementSessions(tenantId: string): void;
3400
- /**
3401
- * Decrement session count
3402
- */
3403
- decrementSessions(tenantId: string): void;
3404
- /**
3405
- * Record a request
3406
- */
3407
- recordRequest(tenantId: string): void;
3408
- /**
3409
- * Record token usage
3410
- */
3411
- recordTokens(tenantId: string, tokens: number): void;
3412
- /**
3413
- * Record audio usage
3414
- */
3415
- recordAudioMinutes(tenantId: string, minutes: number): void;
3416
- /**
3417
- * Get fresh auth token for tenant
3418
- */
3419
- getAuthToken(tenantId: string): Promise<string>;
3420
- /**
3421
- * Update tenant credentials
3422
- */
3423
- updateCredentials(tenantId: string, credentials: Partial<TenantConfig['credentials']>): void;
3424
- /**
3425
- * Get usage stats for tenant
3426
- */
3427
- getUsage(tenantId: string): TenantUsage | undefined;
3428
- /**
3429
- * Get quota for tenant
3430
- */
3431
- getQuota(tenantId: string): TenantQuota | undefined;
3432
- /**
3433
- * Update quota for tenant
3434
- */
3435
- updateQuota(tenantId: string, quota: Partial<TenantQuota>): void;
3436
- /**
3437
- * Reset all usage stats for a tenant
3438
- */
3439
- resetUsage(tenantId: string): void;
3440
- private checkMinuteReset;
3441
- private checkDailyReset;
3442
- }
3443
-
3444
- /**
3445
- * Audio Sync Manager
3446
- *
3447
- * Synchronizes TTS audio playback with lip sync animation:
3448
- * - Buffers audio for inference
3449
- * - Manages playback timing
3450
- * - Handles audio queue for streaming
3451
- *
3452
- * @category AI
3453
- */
3454
-
3455
- /**
3456
- * Audio sync events
3457
- */
3458
- interface AudioSyncEvents {
3459
- [key: string]: unknown;
3460
- 'buffer.ready': {
3461
- audio: Float32Array;
3462
- };
3463
- 'playback.start': Record<string, never>;
3464
- 'playback.end': Record<string, never>;
3465
- 'sync.drift': {
3466
- driftMs: number;
3467
- };
3468
- }
3469
- /**
3470
- * Audio sync configuration
3471
- */
3472
- interface AudioSyncConfig {
3473
- /** Target sample rate (default: 16000) */
3474
- sampleRate?: number;
3475
- /** Buffer size for inference (default: 16640) */
3476
- bufferSize?: number;
3477
- /** Overlap between buffers (default: 4160) */
3478
- overlapSize?: number;
3479
- /** Max drift before correction (default: 100ms) */
3480
- maxDriftMs?: number;
3481
- }
3482
- /**
3483
- * Audio Sync Manager
3484
- */
3485
- declare class AudioSyncManager extends EventEmitter<AudioSyncEvents> {
3486
- private config;
3487
- private audioBuffer;
3488
- private bufferPosition;
3489
- private playbackQueue;
3490
- private isPlaying;
3491
- private audioContext;
3492
- private playbackStartTime;
3493
- private samplesPlayed;
3494
- constructor(config?: AudioSyncConfig);
3495
- /**
3496
- * Initialize audio context
3497
- */
3498
- initialize(): Promise<void>;
3499
- /**
3500
- * Push audio chunk for processing and playback
3501
- */
3502
- pushAudio(audio: Float32Array): void;
3503
- /**
3504
- * Buffer audio for inference
3505
- */
3506
- private bufferForInference;
3507
- /**
3508
- * Start audio playback
3509
- */
3510
- private startPlayback;
3511
- /**
3512
- * Process playback queue
3513
- */
3514
- private processPlaybackQueue;
3515
- /**
3516
- * Check for audio/animation drift
3517
- */
3518
- private checkDrift;
3519
- /**
3520
- * Clear playback queue
3521
- */
3522
- clearQueue(): void;
3523
- /**
3524
- * Stop playback
3525
- */
3526
- stop(): void;
3527
- /**
3528
- * Get current playback position in seconds
3529
- */
3530
- getPlaybackPosition(): number;
3531
- /**
3532
- * Check if currently playing
3533
- */
3534
- getIsPlaying(): boolean;
3535
- /**
3536
- * Dispose resources
3537
- */
3538
- dispose(): void;
3539
- }
3540
-
3541
- /**
3542
- * Interruption Handler
3543
- *
3544
- * VAD-based interruption detection for AI conversations:
3545
- * - Monitors user audio for speech
3546
- * - Detects when user interrupts AI response
3547
- * - Triggers interruption callbacks
3548
- *
3549
- * @category AI
3550
- */
3551
-
3552
- /**
3553
- * Interruption events
3554
- */
3555
- interface InterruptionEvents {
3556
- [key: string]: unknown;
3557
- 'speech.detected': {
3558
- rms: number;
3559
- };
3560
- 'speech.ended': {
3561
- durationMs: number;
3562
- };
3563
- 'interruption.triggered': {
3564
- rms: number;
3565
- durationMs: number;
3566
- };
3567
- }
3568
- /**
3569
- * Interruption handler configuration
3570
- *
3571
- * Industry standards applied:
3572
- * - vadThreshold: 0.5 (Silero VAD default)
3573
- * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
3574
- * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
3575
- */
3576
- interface InterruptionConfig {
3577
- /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
3578
- vadThreshold?: number;
3579
- /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
3580
- minSpeechDurationMs?: number;
3581
- /** Silence duration to end speech (default: 500ms, OpenAI standard) */
3582
- silenceTimeoutMs?: number;
3583
- /** Enable interruption detection (default: true) */
3584
- enabled?: boolean;
3585
- }
3586
- /**
3587
- * Interruption Handler
3588
- */
3589
- declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
3590
- private config;
3591
- private isSpeaking;
3592
- private speechStartTime;
3593
- private lastSpeechTime;
3594
- private silenceTimer;
3595
- private aiIsSpeaking;
3596
- private interruptionTriggeredThisSession;
3597
- constructor(config?: InterruptionConfig);
3598
- /**
3599
- * Process VAD result for interruption detection
3600
- * @param vadProbability - Speech probability from VAD (0-1)
3601
- * @param audioEnergy - Optional RMS energy for logging (default: 0)
3602
- */
3603
- processVADResult(vadProbability: number, audioEnergy?: number): void;
3604
- /**
3605
- * Notify that AI started speaking
3606
- */
3607
- setAISpeaking(speaking: boolean): void;
3608
- /**
3609
- * Enable/disable interruption detection
3610
- */
3611
- setEnabled(enabled: boolean): void;
3612
- /**
3613
- * Update configuration
3614
- */
3615
- updateConfig(config: Partial<InterruptionConfig>): void;
3616
- /**
3617
- * Reset state
3618
- */
3619
- reset(): void;
3620
- /**
3621
- * Get current state
3622
- */
3623
- getState(): {
3624
- isSpeaking: boolean;
3625
- speechDurationMs: number;
3626
- };
3627
- private onSpeechDetected;
3628
- private onSilenceDetected;
3629
- }
3630
-
3631
2896
  /**
3632
2897
  * Model Cache
3633
2898
  *
@@ -4799,4 +4064,4 @@ declare class ProceduralLifeLayer {
4799
4064
  private updateBrowNoise;
4800
4065
  }
4801
4066
 
4802
- export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SessionConfig, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
4067
+ export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ARKIT_BLENDSHAPES, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };