@omote/core 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,5 +1,5 @@
1
- import { EventEmitter, OmoteEvents, AISessionState, AnimationEvent } from './events/index.mjs';
2
- export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
1
+ import { EventEmitter, OmoteEvents } from './events/index.mjs';
2
+ export { AnimationEvent, BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
3
3
  export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
4
4
  export { ARKitToFLAMEMapping, ApiError, AudioChunkEvent, AvatarFormat, Character, CharacterAvatar, CharacterMemory, CharacterPersonality, CharacterSpec, CharacterVoice, CreateCharacterRequest, CreateCharacterResponse, CreateLAMJobRequest, CreateLAMJobResponse, CreateSessionRequest, CreateSessionResponse, GSplatConfig, LAMJob, LAMJobStatus, PROTOCOL_VERSION, PaginatedResponse, PlatformSession, ErrorEvent as ProtocolErrorEvent, ProtocolEvent, ResponseChunkEvent, ResponseEndEvent, ResponseStartEvent, SessionMessage, SessionStatus, isProtocolEvent } from '@omote/types';
5
5
 
@@ -584,6 +584,14 @@ interface FullFacePipelineOptions {
584
584
  chunkSize?: number;
585
585
  /** A2E inference engine */
586
586
  lam: A2EBackend;
587
+ /**
588
+ * Identity/style index for the A2E model (default: 0).
589
+ *
590
+ * The LAM model uses a 12-class one-hot identity vector as style conditioning.
591
+ * Different indices produce different expression intensity across face regions.
592
+ * Only affects Wav2Vec2Inference (GPU). Wav2ArkitCpuInference has identity 11 baked in.
593
+ */
594
+ identityIndex?: number;
587
595
  /** Per-character expression weight scaling */
588
596
  profile?: ExpressionProfile;
589
597
  /**
@@ -719,6 +727,78 @@ declare class FullFacePipeline extends EventEmitter<FullFacePipelineEvents> {
719
727
  dispose(): void;
720
728
  }
721
729
 
730
+ /**
731
+ * Interruption Handler
732
+ *
733
+ * VAD-based barge-in detection for AI conversations:
734
+ * - Monitors VAD probability for user speech
735
+ * - Detects when user interrupts AI response
736
+ * - Triggers interruption callbacks
737
+ */
738
+
739
+ interface InterruptionEvents {
740
+ [key: string]: unknown;
741
+ 'speech.detected': {
742
+ rms: number;
743
+ };
744
+ 'speech.ended': {
745
+ durationMs: number;
746
+ };
747
+ 'interruption.triggered': {
748
+ rms: number;
749
+ durationMs: number;
750
+ };
751
+ }
752
+ /**
753
+ * Interruption handler configuration
754
+ *
755
+ * Industry standards applied:
756
+ * - vadThreshold: 0.5 (Silero VAD default)
757
+ * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
758
+ * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
759
+ */
760
+ interface InterruptionConfig {
761
+ /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
762
+ vadThreshold?: number;
763
+ /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
764
+ minSpeechDurationMs?: number;
765
+ /** Silence duration to end speech (default: 500ms, OpenAI standard) */
766
+ silenceTimeoutMs?: number;
767
+ /** Enable interruption detection (default: true) */
768
+ enabled?: boolean;
769
+ }
770
+ declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
771
+ private config;
772
+ private isSpeaking;
773
+ private speechStartTime;
774
+ private lastSpeechTime;
775
+ private silenceTimer;
776
+ private aiIsSpeaking;
777
+ private interruptionTriggeredThisSession;
778
+ constructor(config?: InterruptionConfig);
779
+ /**
780
+ * Process VAD result for interruption detection
781
+ * @param vadProbability - Speech probability from VAD (0-1)
782
+ * @param audioEnergy - Optional RMS energy for logging (default: 0)
783
+ */
784
+ processVADResult(vadProbability: number, audioEnergy?: number): void;
785
+ /** Notify that AI started/stopped speaking */
786
+ setAISpeaking(speaking: boolean): void;
787
+ /** Enable/disable interruption detection */
788
+ setEnabled(enabled: boolean): void;
789
+ /** Update configuration */
790
+ updateConfig(config: Partial<InterruptionConfig>): void;
791
+ /** Reset state */
792
+ reset(): void;
793
+ /** Get current state */
794
+ getState(): {
795
+ isSpeaking: boolean;
796
+ speechDurationMs: number;
797
+ };
798
+ private onSpeechDetected;
799
+ private onSilenceDetected;
800
+ }
801
+
722
802
  /**
723
803
  * Lazy ONNX Runtime loader with conditional WebGPU/WASM bundle loading
724
804
  *
@@ -2187,6 +2267,17 @@ interface A2EProcessorConfig {
2187
2267
  sampleRate?: number;
2188
2268
  /** Samples per inference chunk (default: 16000 = 1s) */
2189
2269
  chunkSize?: number;
2270
+ /**
2271
+ * Identity/style index for the A2E model (default: 0).
2272
+ *
2273
+ * The LAM model uses a one-hot identity vector (12 classes, indices 0-11) as
2274
+ * style conditioning alongside audio features. Different indices produce
2275
+ * different expression intensity across face regions (brows, eyes, cheeks).
2276
+ *
2277
+ * Only affects Wav2Vec2Inference (GPU model). Wav2ArkitCpuInference has
2278
+ * identity 11 baked into the model weights.
2279
+ */
2280
+ identityIndex?: number;
2190
2281
  /** Callback fired with each blendshape frame (push mode) */
2191
2282
  onFrame?: (frame: Float32Array) => void;
2192
2283
  /** Error callback */
@@ -2196,6 +2287,7 @@ declare class A2EProcessor {
2196
2287
  private readonly backend;
2197
2288
  private readonly sampleRate;
2198
2289
  private readonly chunkSize;
2290
+ private readonly identityIndex;
2199
2291
  private readonly onFrame?;
2200
2292
  private readonly onError?;
2201
2293
  private bufferCapacity;
@@ -2821,813 +2913,6 @@ declare class EmotionController {
2821
2913
  reset(): void;
2822
2914
  }
2823
2915
 
2824
- /**
2825
- * AI Adapter Interface
2826
- *
2827
- * Common interface for AI backends (AWS AgentCore, OpenAI Realtime).
2828
- * Adapters handle the conversation flow and emit events for animation.
2829
- *
2830
- * @category AI
2831
- */
2832
-
2833
- /**
2834
- * Tenant configuration for multi-tenant isolation
2835
- */
2836
- interface TenantConfig {
2837
- /** Unique tenant identifier */
2838
- tenantId: string;
2839
- /** Customer-specific API credentials */
2840
- credentials: {
2841
- apiKey?: string;
2842
- authToken?: string;
2843
- refreshToken?: string;
2844
- };
2845
- /** Character configuration for this tenant */
2846
- characterId: string;
2847
- /** Optional custom endpoint override */
2848
- endpoint?: string;
2849
- }
2850
- /**
2851
- * Voice configuration for TTS
2852
- */
2853
- interface VoiceConfig {
2854
- /** TTS provider */
2855
- provider: 'elevenlabs' | 'openai';
2856
- /** Voice ID */
2857
- voiceId: string;
2858
- /** Stability (0-1, ElevenLabs) */
2859
- stability?: number;
2860
- /** Similarity boost (0-1, ElevenLabs) */
2861
- similarityBoost?: number;
2862
- }
2863
- /**
2864
- * Session configuration
2865
- */
2866
- interface SessionConfig {
2867
- /** Session ID (generated or provided) */
2868
- sessionId: string;
2869
- /** Tenant this session belongs to */
2870
- tenant: TenantConfig;
2871
- /** Initial system prompt / personality */
2872
- systemPrompt?: string;
2873
- /** Voice configuration for TTS */
2874
- voice?: VoiceConfig;
2875
- /** Initial emotion state */
2876
- emotion?: string;
2877
- /** Language code */
2878
- language?: string;
2879
- }
2880
- /**
2881
- * Message role in conversation
2882
- */
2883
- type MessageRole = 'user' | 'assistant' | 'system';
2884
- /**
2885
- * Conversation message in session history
2886
- */
2887
- interface ConversationMessage {
2888
- /** Message role */
2889
- role: MessageRole;
2890
- /** Text content */
2891
- content: string;
2892
- /** Timestamp (ms) */
2893
- timestamp: number;
2894
- /** Emotion detected/expressed */
2895
- emotion?: string;
2896
- /** Audio duration if applicable (ms) */
2897
- audioDurationMs?: number;
2898
- }
2899
-
2900
- /**
2901
- * Events emitted by AI adapters
2902
- */
2903
- interface AIAdapterEvents {
2904
- [key: string]: unknown;
2905
- 'state.change': {
2906
- state: AISessionState;
2907
- previousState: AISessionState;
2908
- };
2909
- 'user.speech.start': {
2910
- timestamp: number;
2911
- };
2912
- 'user.speech.end': {
2913
- timestamp: number;
2914
- durationMs: number;
2915
- };
2916
- 'user.transcript.partial': {
2917
- text: string;
2918
- confidence: number;
2919
- };
2920
- 'user.transcript.final': {
2921
- text: string;
2922
- confidence: number;
2923
- };
2924
- 'ai.thinking.start': {
2925
- timestamp: number;
2926
- };
2927
- 'ai.response.start': {
2928
- text?: string;
2929
- emotion?: string;
2930
- };
2931
- 'ai.response.chunk': {
2932
- text: string;
2933
- isLast: boolean;
2934
- };
2935
- 'ai.response.end': {
2936
- fullText: string;
2937
- durationMs: number;
2938
- };
2939
- 'audio.output.chunk': {
2940
- audio: ArrayBuffer;
2941
- sampleRate: number;
2942
- timestamp: number;
2943
- };
2944
- 'audio.output.end': {
2945
- durationMs: number;
2946
- };
2947
- 'animation': AnimationEvent;
2948
- 'memory.updated': {
2949
- messageCount: number;
2950
- tokenCount?: number;
2951
- };
2952
- 'connection.opened': {
2953
- sessionId: string;
2954
- adapter: string;
2955
- };
2956
- 'connection.closed': {
2957
- reason: string;
2958
- };
2959
- 'connection.error': {
2960
- error: Error;
2961
- recoverable: boolean;
2962
- };
2963
- 'interruption.detected': {
2964
- timestamp: number;
2965
- };
2966
- 'interruption.handled': {
2967
- action: 'stop' | 'continue';
2968
- timestamp: number;
2969
- };
2970
- }
2971
- /**
2972
- * Base interface for all AI adapters
2973
- */
2974
- interface AIAdapter {
2975
- /** Adapter name for logging/debugging */
2976
- readonly name: string;
2977
- /** Current session state */
2978
- readonly state: AISessionState;
2979
- /** Current session ID (null if not connected) */
2980
- readonly sessionId: string | null;
2981
- /** Whether the adapter is connected */
2982
- readonly isConnected: boolean;
2983
- /**
2984
- * Initialize and connect the adapter
2985
- */
2986
- connect(config: SessionConfig): Promise<void>;
2987
- /**
2988
- * Disconnect and cleanup
2989
- */
2990
- disconnect(): Promise<void>;
2991
- /**
2992
- * Push user audio for processing
2993
- * @param audio - PCM audio data (16kHz, mono)
2994
- */
2995
- pushAudio(audio: Int16Array | Float32Array): void;
2996
- /**
2997
- * Send text message directly (bypasses STT)
2998
- */
2999
- sendText(text: string): Promise<void>;
3000
- /**
3001
- * Handle user interruption
3002
- * Stops current AI speech and prepares for new input
3003
- */
3004
- interrupt(): void;
3005
- /**
3006
- * Get conversation history
3007
- */
3008
- getHistory(): ConversationMessage[];
3009
- /**
3010
- * Clear conversation history
3011
- */
3012
- clearHistory(): void;
3013
- /**
3014
- * Check if adapter is available/healthy
3015
- */
3016
- healthCheck(): Promise<boolean>;
3017
- on<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3018
- off<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): void;
3019
- once<K extends keyof AIAdapterEvents>(event: K, callback: (data: AIAdapterEvents[K]) => void): () => void;
3020
- }
3021
-
3022
- /**
3023
- * Conversation Session Interface
3024
- *
3025
- * Represents an active conversation with memory and state.
3026
- *
3027
- * @category AI
3028
- */
3029
-
3030
- /**
3031
- * Serializable session snapshot for persistence
3032
- */
3033
- interface SessionSnapshot {
3034
- /** Session ID */
3035
- sessionId: string;
3036
- /** Tenant ID */
3037
- tenantId: string;
3038
- /** Character ID */
3039
- characterId: string;
3040
- /** Conversation history */
3041
- history: ConversationMessage[];
3042
- /** Custom context */
3043
- context: Record<string, string>;
3044
- /** Emotion state */
3045
- emotion: EmotionWeights;
3046
- /** Creation timestamp */
3047
- createdAt: number;
3048
- /** Last activity timestamp */
3049
- lastActivityAt: number;
3050
- }
3051
- /**
3052
- * Extended session with memory management
3053
- */
3054
- interface ConversationSession {
3055
- /** Session identifier */
3056
- readonly sessionId: string;
3057
- /** Associated AI adapter */
3058
- readonly adapter: AIAdapter;
3059
- /** Session configuration */
3060
- readonly config: SessionConfig;
3061
- /** Current state */
3062
- readonly state: AISessionState;
3063
- /** Conversation history */
3064
- readonly history: ConversationMessage[];
3065
- /** Current emotion state */
3066
- readonly emotion: EmotionWeights;
3067
- /** Session creation timestamp */
3068
- readonly createdAt: number;
3069
- /** Last activity timestamp */
3070
- readonly lastActivityAt: number;
3071
- /**
3072
- * Start the session (connects adapter)
3073
- */
3074
- start(): Promise<void>;
3075
- /**
3076
- * End the session (disconnects adapter)
3077
- */
3078
- end(): Promise<void>;
3079
- /**
3080
- * Push audio input
3081
- */
3082
- pushAudio(audio: Int16Array | Float32Array): void;
3083
- /**
3084
- * Send text input directly
3085
- */
3086
- sendText(text: string): Promise<void>;
3087
- /**
3088
- * Interrupt current AI response
3089
- */
3090
- interrupt(): void;
3091
- /**
3092
- * Update emotion state
3093
- */
3094
- setEmotion(emotion: EmotionWeights): void;
3095
- /**
3096
- * Add a context item (custom memory)
3097
- */
3098
- addContext(key: string, value: string): void;
3099
- /**
3100
- * Remove a context item
3101
- */
3102
- removeContext(key: string): void;
3103
- /**
3104
- * Get all context items
3105
- */
3106
- getContext(): Record<string, string>;
3107
- /**
3108
- * Export session for persistence
3109
- */
3110
- export(): SessionSnapshot;
3111
- /**
3112
- * Import session from snapshot
3113
- */
3114
- import(snapshot: SessionSnapshot): void;
3115
- }
3116
-
3117
- /**
3118
- * AWS AgentCore Adapter
3119
- *
3120
- * Primary AI adapter for the Omote Platform.
3121
- *
3122
- * Pipeline:
3123
- * User Audio -> Whisper ASR (local) -> Text
3124
- * Text -> AgentCore (WebSocket) -> Response Text + Audio chunks (TTS handled backend-side)
3125
- * Audio chunks -> LAM (local) -> Blendshapes -> Render
3126
- *
3127
- * @category AI
3128
- */
3129
-
3130
- /**
3131
- * AgentCore-specific configuration
3132
- */
3133
- interface AgentCoreConfig {
3134
- /** AgentCore WebSocket endpoint */
3135
- endpoint: string;
3136
- /** AWS region */
3137
- region?: string;
3138
- /** Model URLs */
3139
- models?: {
3140
- lamUrl?: string;
3141
- };
3142
- /** Enable observability */
3143
- observability?: {
3144
- tracing?: boolean;
3145
- metrics?: boolean;
3146
- };
3147
- }
3148
- /**
3149
- * AWS AgentCore Adapter
3150
- */
3151
- declare class AgentCoreAdapter extends EventEmitter<AIAdapterEvents> implements AIAdapter {
3152
- readonly name = "AgentCore";
3153
- private _state;
3154
- private _sessionId;
3155
- private _isConnected;
3156
- private asr;
3157
- private vad;
3158
- private lam;
3159
- private emotionController;
3160
- private pipeline;
3161
- private ws;
3162
- private wsReconnectAttempts;
3163
- private readonly maxReconnectAttempts;
3164
- private audioBuffer;
3165
- private history;
3166
- private currentConfig;
3167
- private agentCoreConfig;
3168
- private isSpeaking;
3169
- private currentTtsAbortController;
3170
- private tokenCache;
3171
- constructor(config: AgentCoreConfig);
3172
- get state(): AISessionState;
3173
- get sessionId(): string | null;
3174
- get isConnected(): boolean;
3175
- /**
3176
- * Connect to AgentCore with session configuration
3177
- */
3178
- connect(config: SessionConfig): Promise<void>;
3179
- /**
3180
- * Disconnect and cleanup
3181
- */
3182
- disconnect(): Promise<void>;
3183
- /**
3184
- * Push user audio for processing
3185
- */
3186
- pushAudio(audio: Int16Array | Float32Array): void;
3187
- /**
3188
- * Send text directly to AgentCore
3189
- */
3190
- sendText(text: string): Promise<void>;
3191
- /**
3192
- * Interrupt current AI response
3193
- */
3194
- interrupt(): void;
3195
- getHistory(): ConversationMessage[];
3196
- clearHistory(): void;
3197
- healthCheck(): Promise<boolean>;
3198
- private setState;
3199
- private getAuthToken;
3200
- private initASR;
3201
- private initLAM;
3202
- private initPipeline;
3203
- private connectWebSocket;
3204
- private handleAgentCoreMessage;
3205
- private scheduleTranscription;
3206
- /**
3207
- * Detect voice activity using Silero VAD
3208
- * Falls back to simple RMS if VAD not available
3209
- */
3210
- private detectVoiceActivity;
3211
- private base64ToArrayBuffer;
3212
- private addToHistory;
3213
- private handleDisconnect;
3214
- }
3215
-
3216
- /**
3217
- * Conversation Orchestrator
3218
- *
3219
- * Manages the conversation pipeline with AgentCore:
3220
- * - Handles session lifecycle and tenant isolation
3221
- * - Manages adapter events and state
3222
- *
3223
- * @category AI
3224
- */
3225
-
3226
- /**
3227
- * Orchestrator configuration
3228
- */
3229
- interface OrchestratorConfig {
3230
- /** AgentCore adapter config */
3231
- adapter: AgentCoreConfig;
3232
- /** Connection timeout in ms */
3233
- connectionTimeoutMs?: number;
3234
- /** Max retry attempts */
3235
- maxRetries?: number;
3236
- }
3237
- /**
3238
- * Orchestrator events (extends AI adapter events)
3239
- */
3240
- interface OrchestratorEvents extends AIAdapterEvents {
3241
- 'session.created': {
3242
- sessionId: string;
3243
- tenantId: string;
3244
- };
3245
- 'session.ended': {
3246
- sessionId: string;
3247
- reason: string;
3248
- };
3249
- }
3250
- /**
3251
- * Conversation Orchestrator
3252
- */
3253
- declare class ConversationOrchestrator extends EventEmitter<OrchestratorEvents> {
3254
- private config;
3255
- private adapter;
3256
- private sessions;
3257
- private tenants;
3258
- private healthCheckInterval;
3259
- private readonly HEALTH_CHECK_INTERVAL_MS;
3260
- constructor(config: OrchestratorConfig);
3261
- /**
3262
- * Register a tenant
3263
- */
3264
- registerTenant(tenant: TenantConfig): void;
3265
- /**
3266
- * Unregister a tenant
3267
- */
3268
- unregisterTenant(tenantId: string): void;
3269
- /**
3270
- * Get tenant config
3271
- */
3272
- getTenant(tenantId: string): TenantConfig | undefined;
3273
- /**
3274
- * Create a new conversation session for a tenant
3275
- */
3276
- createSession(tenantId: string, options?: Partial<SessionConfig>): Promise<ConversationSession>;
3277
- /**
3278
- * End a session
3279
- */
3280
- endSession(sessionId: string): Promise<void>;
3281
- /**
3282
- * Get session by ID
3283
- */
3284
- getSession(sessionId: string): ConversationSession | undefined;
3285
- /**
3286
- * Get all sessions for a tenant
3287
- */
3288
- getTenantSessions(tenantId: string): ConversationSession[];
3289
- /**
3290
- * Start health monitoring
3291
- */
3292
- startHealthMonitoring(): void;
3293
- /**
3294
- * Stop health monitoring
3295
- */
3296
- stopHealthMonitoring(): void;
3297
- /**
3298
- * Dispose all resources
3299
- */
3300
- dispose(): Promise<void>;
3301
- private generateSessionId;
3302
- private forwardAdapterEvents;
3303
- private performHealthCheck;
3304
- }
3305
-
3306
- /**
3307
- * Tenant Manager
3308
- *
3309
- * Handles multi-tenant isolation for the Omote Platform:
3310
- * - Credential isolation per tenant
3311
- * - Session scoping per tenant
3312
- * - Quota management
3313
- * - Token refresh
3314
- *
3315
- * @category AI
3316
- */
3317
-
3318
- /**
3319
- * Tenant quota configuration
3320
- */
3321
- interface TenantQuota {
3322
- /** Max concurrent sessions */
3323
- maxSessions: number;
3324
- /** Requests per minute */
3325
- requestsPerMinute: number;
3326
- /** Max tokens per conversation */
3327
- maxTokensPerConversation: number;
3328
- /** Max audio minutes per day */
3329
- maxAudioMinutesPerDay: number;
3330
- }
3331
- /**
3332
- * Tenant usage tracking
3333
- */
3334
- interface TenantUsage {
3335
- /** Current active sessions */
3336
- currentSessions: number;
3337
- /** Requests in current minute */
3338
- requestsThisMinute: number;
3339
- /** Total tokens used */
3340
- tokensUsed: number;
3341
- /** Audio minutes used today */
3342
- audioMinutesToday: number;
3343
- /** Last reset timestamp */
3344
- lastMinuteReset: number;
3345
- /** Last daily reset timestamp */
3346
- lastDailyReset: number;
3347
- }
3348
- /**
3349
- * Token refresh callback
3350
- */
3351
- type TokenRefreshCallback = () => Promise<string>;
3352
- /**
3353
- * Tenant Manager
3354
- */
3355
- declare class TenantManager {
3356
- private tenants;
3357
- private quotas;
3358
- private usage;
3359
- private tokenRefreshCallbacks;
3360
- /**
3361
- * Default quota for new tenants
3362
- */
3363
- static readonly DEFAULT_QUOTA: TenantQuota;
3364
- /**
3365
- * Register a tenant with quota
3366
- */
3367
- register(tenant: TenantConfig, quota?: TenantQuota, tokenRefreshCallback?: TokenRefreshCallback): void;
3368
- /**
3369
- * Unregister a tenant
3370
- */
3371
- unregister(tenantId: string): void;
3372
- /**
3373
- * Get tenant config
3374
- */
3375
- get(tenantId: string): TenantConfig | undefined;
3376
- /**
3377
- * Check if tenant exists
3378
- */
3379
- has(tenantId: string): boolean;
3380
- /**
3381
- * Get all tenant IDs
3382
- */
3383
- getTenantIds(): string[];
3384
- /**
3385
- * Check if tenant can create new session
3386
- */
3387
- canCreateSession(tenantId: string): boolean;
3388
- /**
3389
- * Check if tenant can make request
3390
- */
3391
- canMakeRequest(tenantId: string): boolean;
3392
- /**
3393
- * Check if tenant can use audio
3394
- */
3395
- canUseAudio(tenantId: string, minutes: number): boolean;
3396
- /**
3397
- * Increment session count
3398
- */
3399
- incrementSessions(tenantId: string): void;
3400
- /**
3401
- * Decrement session count
3402
- */
3403
- decrementSessions(tenantId: string): void;
3404
- /**
3405
- * Record a request
3406
- */
3407
- recordRequest(tenantId: string): void;
3408
- /**
3409
- * Record token usage
3410
- */
3411
- recordTokens(tenantId: string, tokens: number): void;
3412
- /**
3413
- * Record audio usage
3414
- */
3415
- recordAudioMinutes(tenantId: string, minutes: number): void;
3416
- /**
3417
- * Get fresh auth token for tenant
3418
- */
3419
- getAuthToken(tenantId: string): Promise<string>;
3420
- /**
3421
- * Update tenant credentials
3422
- */
3423
- updateCredentials(tenantId: string, credentials: Partial<TenantConfig['credentials']>): void;
3424
- /**
3425
- * Get usage stats for tenant
3426
- */
3427
- getUsage(tenantId: string): TenantUsage | undefined;
3428
- /**
3429
- * Get quota for tenant
3430
- */
3431
- getQuota(tenantId: string): TenantQuota | undefined;
3432
- /**
3433
- * Update quota for tenant
3434
- */
3435
- updateQuota(tenantId: string, quota: Partial<TenantQuota>): void;
3436
- /**
3437
- * Reset all usage stats for a tenant
3438
- */
3439
- resetUsage(tenantId: string): void;
3440
- private checkMinuteReset;
3441
- private checkDailyReset;
3442
- }
3443
-
3444
- /**
3445
- * Audio Sync Manager
3446
- *
3447
- * Synchronizes TTS audio playback with lip sync animation:
3448
- * - Buffers audio for inference
3449
- * - Manages playback timing
3450
- * - Handles audio queue for streaming
3451
- *
3452
- * @category AI
3453
- */
3454
-
3455
- /**
3456
- * Audio sync events
3457
- */
3458
- interface AudioSyncEvents {
3459
- [key: string]: unknown;
3460
- 'buffer.ready': {
3461
- audio: Float32Array;
3462
- };
3463
- 'playback.start': Record<string, never>;
3464
- 'playback.end': Record<string, never>;
3465
- 'sync.drift': {
3466
- driftMs: number;
3467
- };
3468
- }
3469
- /**
3470
- * Audio sync configuration
3471
- */
3472
- interface AudioSyncConfig {
3473
- /** Target sample rate (default: 16000) */
3474
- sampleRate?: number;
3475
- /** Buffer size for inference (default: 16640) */
3476
- bufferSize?: number;
3477
- /** Overlap between buffers (default: 4160) */
3478
- overlapSize?: number;
3479
- /** Max drift before correction (default: 100ms) */
3480
- maxDriftMs?: number;
3481
- }
3482
- /**
3483
- * Audio Sync Manager
3484
- */
3485
- declare class AudioSyncManager extends EventEmitter<AudioSyncEvents> {
3486
- private config;
3487
- private audioBuffer;
3488
- private bufferPosition;
3489
- private playbackQueue;
3490
- private isPlaying;
3491
- private audioContext;
3492
- private playbackStartTime;
3493
- private samplesPlayed;
3494
- constructor(config?: AudioSyncConfig);
3495
- /**
3496
- * Initialize audio context
3497
- */
3498
- initialize(): Promise<void>;
3499
- /**
3500
- * Push audio chunk for processing and playback
3501
- */
3502
- pushAudio(audio: Float32Array): void;
3503
- /**
3504
- * Buffer audio for inference
3505
- */
3506
- private bufferForInference;
3507
- /**
3508
- * Start audio playback
3509
- */
3510
- private startPlayback;
3511
- /**
3512
- * Process playback queue
3513
- */
3514
- private processPlaybackQueue;
3515
- /**
3516
- * Check for audio/animation drift
3517
- */
3518
- private checkDrift;
3519
- /**
3520
- * Clear playback queue
3521
- */
3522
- clearQueue(): void;
3523
- /**
3524
- * Stop playback
3525
- */
3526
- stop(): void;
3527
- /**
3528
- * Get current playback position in seconds
3529
- */
3530
- getPlaybackPosition(): number;
3531
- /**
3532
- * Check if currently playing
3533
- */
3534
- getIsPlaying(): boolean;
3535
- /**
3536
- * Dispose resources
3537
- */
3538
- dispose(): void;
3539
- }
3540
-
3541
- /**
3542
- * Interruption Handler
3543
- *
3544
- * VAD-based interruption detection for AI conversations:
3545
- * - Monitors user audio for speech
3546
- * - Detects when user interrupts AI response
3547
- * - Triggers interruption callbacks
3548
- *
3549
- * @category AI
3550
- */
3551
-
3552
- /**
3553
- * Interruption events
3554
- */
3555
- interface InterruptionEvents {
3556
- [key: string]: unknown;
3557
- 'speech.detected': {
3558
- rms: number;
3559
- };
3560
- 'speech.ended': {
3561
- durationMs: number;
3562
- };
3563
- 'interruption.triggered': {
3564
- rms: number;
3565
- durationMs: number;
3566
- };
3567
- }
3568
- /**
3569
- * Interruption handler configuration
3570
- *
3571
- * Industry standards applied:
3572
- * - vadThreshold: 0.5 (Silero VAD default)
3573
- * - minSpeechDurationMs: 200ms (Google/Amazon barge-in standard)
3574
- * - silenceTimeoutMs: 500ms (OpenAI Realtime API standard)
3575
- */
3576
- interface InterruptionConfig {
3577
- /** VAD probability threshold for speech detection (default: 0.5, Silero standard) */
3578
- vadThreshold?: number;
3579
- /** Minimum speech duration to trigger interruption (default: 200ms, Google/Amazon standard) */
3580
- minSpeechDurationMs?: number;
3581
- /** Silence duration to end speech (default: 500ms, OpenAI standard) */
3582
- silenceTimeoutMs?: number;
3583
- /** Enable interruption detection (default: true) */
3584
- enabled?: boolean;
3585
- }
3586
- /**
3587
- * Interruption Handler
3588
- */
3589
- declare class InterruptionHandler extends EventEmitter<InterruptionEvents> {
3590
- private config;
3591
- private isSpeaking;
3592
- private speechStartTime;
3593
- private lastSpeechTime;
3594
- private silenceTimer;
3595
- private aiIsSpeaking;
3596
- private interruptionTriggeredThisSession;
3597
- constructor(config?: InterruptionConfig);
3598
- /**
3599
- * Process VAD result for interruption detection
3600
- * @param vadProbability - Speech probability from VAD (0-1)
3601
- * @param audioEnergy - Optional RMS energy for logging (default: 0)
3602
- */
3603
- processVADResult(vadProbability: number, audioEnergy?: number): void;
3604
- /**
3605
- * Notify that AI started speaking
3606
- */
3607
- setAISpeaking(speaking: boolean): void;
3608
- /**
3609
- * Enable/disable interruption detection
3610
- */
3611
- setEnabled(enabled: boolean): void;
3612
- /**
3613
- * Update configuration
3614
- */
3615
- updateConfig(config: Partial<InterruptionConfig>): void;
3616
- /**
3617
- * Reset state
3618
- */
3619
- reset(): void;
3620
- /**
3621
- * Get current state
3622
- */
3623
- getState(): {
3624
- isSpeaking: boolean;
3625
- speechDurationMs: number;
3626
- };
3627
- private onSpeechDetected;
3628
- private onSilenceDetected;
3629
- }
3630
-
3631
2916
  /**
3632
2917
  * Model Cache
3633
2918
  *
@@ -4799,4 +4084,4 @@ declare class ProceduralLifeLayer {
4799
4084
  private updateBrowNoise;
4800
4085
  }
4801
4086
 
4802
- export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, type AIAdapter, type AIAdapterEvents, AISessionState, ARKIT_BLENDSHAPES, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SessionConfig, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };
4087
+ export { type A2EBackend, type A2EModelInfo, A2EOrchestrator, type A2EOrchestratorConfig, A2EProcessor, type A2EProcessorConfig, type A2EProgressEvent, type A2EResult, ARKIT_BLENDSHAPES, type ActiveSpan, type AnimationClip, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, BLENDSHAPE_TO_GROUP, type BackendPreference, type BlendWeight, type BlendshapeGroup, BlendshapeSmoother, type BlendshapeSmootherConfig, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type CreateA2EConfig, type CreateSenseVoiceConfig, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type ExpressionProfile, type FetchWithCacheOptions, type FullFaceFrame, FullFacePipeline, type FullFacePipelineEvents, type FullFacePipelineOptions, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, LAM_BLENDSHAPES, type LifeLayerConfig, type LifeLayerInput, type LifeLayerOutput, MODEL_LOAD_TIME_BUCKETS, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, ProceduralLifeLayer, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SenseVoiceBackend, type SenseVoiceConfig, SenseVoiceInference, type SenseVoiceLanguage, type SenseVoiceModelInfo, type SenseVoiceResult, SenseVoiceUnifiedAdapter, SenseVoiceWorker, type SenseVoiceWorkerConfig, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADUnifiedAdapter, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type Transition, UnifiedInferenceWorker, type VADBackend, type VADModelInfo, type VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type Wav2ArkitCpuConfig, Wav2ArkitCpuInference, Wav2ArkitCpuUnifiedAdapter, Wav2ArkitCpuWorker, type Wav2ArkitCpuWorkerConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, blendEmotions, calculatePeak, calculateRMS, configureCacheLimit, configureTelemetry, createA2E, createEmotionVector, createSenseVoice, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getModelCache, getOptimalWasmThreads, getRecommendedBackend, getTelemetry, hasWebGPUApi, isAndroid, isIOS, isIOSSafari, isMobile, isSafari, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpBlendshapes, lerpEmotion, preloadModels, resolveBackend, shouldEnableWasmProxy, shouldUseCpuA2E, shouldUseNativeASR, shouldUseServerA2E, supportsVADWorker };