@omote/core 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,236 +1,7 @@
1
+ import { EventEmitter, OmoteEvents, AnimationEvent } from './events/index.mjs';
2
+ export { BackendEvent, EmotionEvent, GazeEvent, STTFinalEvent, STTPartialEvent, SessionStateEvent, TTSEndEvent, TTSMarkEvent, TTSStartEvent, VisemeEvent } from './events/index.mjs';
1
3
  import { InferenceSession, Tensor, Env } from 'onnxruntime-common';
2
-
3
- /**
4
- * Type-safe event emitter for Omote core events
5
- *
6
- * @category Events
7
- */
8
- type EventCallback<T = unknown> = (data: T) => void;
9
- declare class EventEmitter<TEvents extends {
10
- [key: string]: unknown;
11
- }> {
12
- private listeners;
13
- on<K extends keyof TEvents>(event: K, callback: EventCallback<TEvents[K]>): () => void;
14
- off<K extends keyof TEvents>(event: K, callback: EventCallback<TEvents[K]>): void;
15
- emit<K extends keyof TEvents>(event: K, data: TEvents[K]): void;
16
- once<K extends keyof TEvents>(event: K, callback: EventCallback<TEvents[K]>): () => void;
17
- removeAllListeners(event?: keyof TEvents): void;
18
- }
19
-
20
- /**
21
- * Core Omote event types - the contract between core and renderers
22
- *
23
- * Renderers subscribe to these events and apply them to their specific
24
- * rendering system (R3F, Three.js, Babylon, Unity, etc.)
25
- */
26
- /** Animation frame with blendshape weights */
27
- interface AnimationEvent {
28
- /** 52 ARKit blendshape weights (0-1 range) */
29
- blendshapes: Float32Array;
30
- /** Named blendshape access */
31
- get(name: string): number;
32
- /** Raw model output weights (for debugging) */
33
- rawWeights?: Float32Array;
34
- /** Timestamp in ms */
35
- timestamp: number;
36
- /** Inference latency in ms */
37
- inferenceMs: number;
38
- /** Frame index within the current batch (for LAM multi-frame output) */
39
- frameIndex?: number;
40
- /** Total frames in the current batch (for LAM multi-frame output) */
41
- totalFrames?: number;
42
- }
43
- /** Viseme for lip sync */
44
- interface VisemeEvent {
45
- /** Viseme ID or phoneme */
46
- viseme: string;
47
- /** Weight 0-1 */
48
- weight: number;
49
- /** Duration in ms */
50
- duration: number;
51
- }
52
- /** Emotion state change */
53
- interface EmotionEvent {
54
- /** Emotion weights by name */
55
- values: Record<string, number>;
56
- /** Transition duration in ms */
57
- transitionMs: number;
58
- }
59
- /** Gaze target change */
60
- interface GazeEvent {
61
- /** Target type */
62
- target: 'camera' | 'wander' | 'position';
63
- /** Position if target is 'position' */
64
- position?: {
65
- x: number;
66
- y: number;
67
- z: number;
68
- };
69
- }
70
- /** Audio playback events */
71
- interface TTSStartEvent {
72
- /** Audio duration in ms */
73
- durationMs: number;
74
- /** Text being spoken */
75
- text: string;
76
- }
77
- interface TTSMarkEvent {
78
- /** Mark name/type */
79
- name: string;
80
- /** Time offset in ms */
81
- timeMs: number;
82
- }
83
- interface TTSEndEvent {
84
- /** Whether playback completed normally */
85
- completed: boolean;
86
- }
87
- /** STT transcription events */
88
- interface STTPartialEvent {
89
- /** Partial transcription */
90
- text: string;
91
- /** Confidence 0-1 */
92
- confidence: number;
93
- }
94
- interface STTFinalEvent {
95
- /** Final transcription */
96
- text: string;
97
- /** Confidence 0-1 */
98
- confidence: number;
99
- }
100
- /** Session state events */
101
- interface SessionStateEvent {
102
- state: 'connecting' | 'connected' | 'ready' | 'streaming' | 'error' | 'disconnected';
103
- error?: Error;
104
- }
105
- /** Backend info */
106
- interface BackendEvent {
107
- type: 'webgpu' | 'wasm' | 'remote';
108
- modelLoaded: boolean;
109
- loadTimeMs?: number;
110
- }
111
- /** AI adapter state */
112
- type AISessionState$1 = 'idle' | 'listening' | 'thinking' | 'speaking' | 'interrupted' | 'error' | 'disconnected';
113
- /** AI state change event */
114
- interface AIStateChangeEvent {
115
- state: AISessionState$1;
116
- previousState: AISessionState$1;
117
- }
118
- /** User speech events */
119
- interface UserSpeechStartEvent {
120
- timestamp: number;
121
- }
122
- interface UserSpeechEndEvent {
123
- timestamp: number;
124
- durationMs: number;
125
- }
126
- interface UserTranscriptEvent {
127
- text: string;
128
- confidence: number;
129
- }
130
- /** AI response events */
131
- interface AIThinkingStartEvent {
132
- timestamp: number;
133
- }
134
- interface AIResponseStartEvent {
135
- text?: string;
136
- emotion?: string;
137
- }
138
- interface AIResponseChunkEvent {
139
- text: string;
140
- isLast: boolean;
141
- }
142
- interface AIResponseEndEvent {
143
- fullText: string;
144
- durationMs: number;
145
- }
146
- /** Audio output events (for lip sync processing) */
147
- interface AudioOutputChunkEvent {
148
- audio: ArrayBuffer;
149
- sampleRate: number;
150
- timestamp: number;
151
- }
152
- interface AudioOutputEndEvent {
153
- durationMs: number;
154
- }
155
- /** Adapter events */
156
- interface AdapterSwitchEvent {
157
- from: string;
158
- to: string;
159
- reason: string;
160
- }
161
- interface AdapterFallbackEvent {
162
- adapter: string;
163
- reason: string;
164
- }
165
- interface InterruptionEvent {
166
- timestamp: number;
167
- action?: 'stop' | 'continue';
168
- }
169
- /**
170
- * Complete event map for OmoteCore
171
- */
172
- type OmoteEvents = {
173
- 'animation': AnimationEvent;
174
- 'animation.ready': {
175
- backend: 'webgpu' | 'wasm';
176
- };
177
- 'viseme': VisemeEvent;
178
- 'emotion': EmotionEvent;
179
- 'gaze': GazeEvent;
180
- 'tts.start': TTSStartEvent;
181
- 'tts.mark': TTSMarkEvent;
182
- 'tts.end': TTSEndEvent;
183
- 'stt.partial': STTPartialEvent;
184
- 'stt.final': STTFinalEvent;
185
- 'session.state': SessionStateEvent;
186
- 'backend': BackendEvent;
187
- 'audio.chunk': {
188
- pcm: Int16Array;
189
- timestamp: number;
190
- };
191
- 'audio.level': {
192
- rms: number;
193
- peak: number;
194
- };
195
- 'audio.output.chunk': AudioOutputChunkEvent;
196
- 'audio.output.end': AudioOutputEndEvent;
197
- 'ai.state.change': AIStateChangeEvent;
198
- 'ai.thinking.start': AIThinkingStartEvent;
199
- 'ai.response.start': AIResponseStartEvent;
200
- 'ai.response.chunk': AIResponseChunkEvent;
201
- 'ai.response.end': AIResponseEndEvent;
202
- 'user.speech.start': UserSpeechStartEvent;
203
- 'user.speech.end': UserSpeechEndEvent;
204
- 'user.transcript.partial': UserTranscriptEvent;
205
- 'user.transcript.final': UserTranscriptEvent;
206
- 'adapter.switch': AdapterSwitchEvent;
207
- 'adapter.fallback': AdapterFallbackEvent;
208
- 'adapter.recovered': {
209
- adapter: string;
210
- };
211
- 'interruption.detected': InterruptionEvent;
212
- 'interruption.handled': InterruptionEvent;
213
- 'memory.updated': {
214
- messageCount: number;
215
- tokenCount?: number;
216
- };
217
- 'connection.opened': {
218
- sessionId: string;
219
- adapter?: string;
220
- };
221
- 'connection.closed': {
222
- reason: string;
223
- };
224
- 'connection.error': {
225
- error: Error;
226
- recoverable: boolean;
227
- };
228
- 'error': {
229
- code: string;
230
- message: string;
231
- details?: unknown;
232
- };
233
- };
4
+ export { D as DEFAULT_LOGGING_CONFIG, I as ILogger, e as LOG_LEVEL_PRIORITY, b as LogEntry, L as LogFormatter, a as LogLevel, c as LogSink, d as LoggingConfig, f as configureLogging, i as createLogger, g as getLoggingConfig, n as noopLogger, r as resetLoggingConfig, s as setLogLevel, h as setLoggingEnabled } from './Logger-I_k4sGhM.mjs';
234
5
 
235
6
  /**
236
7
  * Microphone capture - renderer-agnostic audio input
@@ -3276,138 +3047,6 @@ declare function scanForInvalidCaches(): Promise<{
3276
3047
  */
3277
3048
  declare function nukeBrowserCaches(preventRecreation?: boolean): Promise<number>;
3278
3049
 
3279
- /**
3280
- * Logging types for Omote SDK
3281
- *
3282
- * 6-level logging system with structured output:
3283
- * - error: Critical failures that prevent operation
3284
- * - warn: Recoverable issues or degraded performance
3285
- * - info: Key lifecycle events (model loaded, inference complete)
3286
- * - debug: Detailed operational info for development
3287
- * - trace: Fine-grained tracing for performance analysis
3288
- * - verbose: Extremely detailed output (tensor shapes, intermediate values)
3289
- */
3290
- type LogLevel = 'error' | 'warn' | 'info' | 'debug' | 'trace' | 'verbose';
3291
- /**
3292
- * Numeric priority for log levels (lower = more severe)
3293
- */
3294
- declare const LOG_LEVEL_PRIORITY: Record<LogLevel, number>;
3295
- /**
3296
- * Structured log entry
3297
- */
3298
- interface LogEntry {
3299
- /** Unix timestamp in milliseconds */
3300
- timestamp: number;
3301
- /** Log level */
3302
- level: LogLevel;
3303
- /** Module name (e.g., 'LocalInference', 'ModelCache') */
3304
- module: string;
3305
- /** Human-readable message */
3306
- message: string;
3307
- /** Optional structured data */
3308
- data?: Record<string, unknown>;
3309
- /** Optional error object */
3310
- error?: Error;
3311
- }
3312
- /**
3313
- * Log output sink interface
3314
- */
3315
- interface LogSink {
3316
- (entry: LogEntry): void;
3317
- }
3318
- /**
3319
- * Log formatter interface
3320
- */
3321
- interface LogFormatter {
3322
- (entry: LogEntry): string;
3323
- }
3324
- /**
3325
- * Global logging configuration
3326
- */
3327
- interface LoggingConfig {
3328
- /** Minimum log level to output (default: 'info') */
3329
- level: LogLevel;
3330
- /** Enable/disable logging globally (default: true) */
3331
- enabled: boolean;
3332
- /** Output format: 'json' for structured, 'pretty' for human-readable */
3333
- format: 'json' | 'pretty';
3334
- /** Custom output sink (default: console) */
3335
- sink?: LogSink;
3336
- /** Include timestamps in output (default: true) */
3337
- timestamps?: boolean;
3338
- /** Include module name in output (default: true) */
3339
- includeModule?: boolean;
3340
- }
3341
- /**
3342
- * Logger interface for module-specific logging
3343
- */
3344
- interface ILogger {
3345
- error(message: string, data?: Record<string, unknown>): void;
3346
- warn(message: string, data?: Record<string, unknown>): void;
3347
- info(message: string, data?: Record<string, unknown>): void;
3348
- debug(message: string, data?: Record<string, unknown>): void;
3349
- trace(message: string, data?: Record<string, unknown>): void;
3350
- verbose(message: string, data?: Record<string, unknown>): void;
3351
- /** Create a child logger with a sub-module name */
3352
- child(subModule: string): ILogger;
3353
- /** Get the module name for this logger */
3354
- readonly module: string;
3355
- }
3356
- /**
3357
- * Default configuration
3358
- */
3359
- declare const DEFAULT_LOGGING_CONFIG: LoggingConfig;
3360
-
3361
- /**
3362
- * Omote SDK Logger
3363
- *
3364
- * Unified logging system with:
3365
- * - 6 log levels (error, warn, info, debug, trace, verbose)
3366
- * - Structured JSON output for machine parsing
3367
- * - Pretty output for human readability
3368
- * - Module-based child loggers
3369
- * - Runtime configuration
3370
- * - Browser and Node.js compatible
3371
- */
3372
-
3373
- /**
3374
- * Configure global logging settings
3375
- */
3376
- declare function configureLogging(config: Partial<LoggingConfig>): void;
3377
- /**
3378
- * Get current logging configuration
3379
- */
3380
- declare function getLoggingConfig(): LoggingConfig;
3381
- /**
3382
- * Reset logging configuration to defaults
3383
- */
3384
- declare function resetLoggingConfig(): void;
3385
- /**
3386
- * Set log level at runtime
3387
- */
3388
- declare function setLogLevel(level: LogLevel): void;
3389
- /**
3390
- * Enable or disable logging
3391
- */
3392
- declare function setLoggingEnabled(enabled: boolean): void;
3393
- /**
3394
- * Create a logger for a specific module
3395
- *
3396
- * @param module - Module name (e.g., 'LocalInference', 'ModelCache')
3397
- * @returns Logger instance
3398
- *
3399
- * @example
3400
- * ```typescript
3401
- * const logger = createLogger('LocalInference');
3402
- * logger.info('Model loaded', { backend: 'webgpu', loadTimeMs: 1234 });
3403
- * ```
3404
- */
3405
- declare function createLogger(module: string): ILogger;
3406
- /**
3407
- * No-op logger for when logging is completely disabled
3408
- */
3409
- declare const noopLogger: ILogger;
3410
-
3411
3050
  /**
3412
3051
  * Telemetry Types
3413
3052
  *
@@ -4170,4 +3809,4 @@ declare class EmphasisDetector {
4170
3809
  reset(): void;
4171
3810
  }
4172
3811
 
4173
- export { type AIAdapter, type AIAdapterEvents, type AISessionState, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, type AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendEvent, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, DEFAULT_ANIMATION_CONFIG, DEFAULT_LOGGING_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionEvent, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, type GazeEvent, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, type ILogger, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, LOG_LEVEL_PRIORITY, type LogEntry, type LogFormatter, type LogLevel, type LogSink, type LoggingConfig, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, type OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type STTFinalEvent, type STTPartialEvent, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SessionStateEvent, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TTSEndEvent, type TTSMarkEvent, type TTSStartEvent, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VisemeEvent, type VoiceConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureLogging, configureTelemetry, createEmotionVector, createLogger, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getLoggingConfig, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, noopLogger, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, resetLoggingConfig, resolveBackend, scanForInvalidCaches, setLogLevel, setLoggingEnabled, shouldEnableWasmProxy, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, validateCachedResponse };
3812
+ export { type AIAdapter, type AIAdapterEvents, type AISessionState, type ActiveSpan, AgentCoreAdapter, type AgentCoreConfig, type AnimationClip, AnimationEvent, AnimationGraph, type AnimationGraphConfig, type AnimationGraphEvents, type AnimationLayer, type AnimationOutput, type AnimationState, type AnimationStateName, type AnimationTrigger, AudioChunkCoalescer, type AudioChunkCoalescerOptions, AudioEnergyAnalyzer, AudioScheduler, type AudioSchedulerOptions, type AudioSyncConfig, type AudioSyncEvents, AudioSyncManager, type BackendPreference, type BlendWeight, CTC_VOCAB, type CacheConfig, type CacheSpanAttributes, ConsoleExporter, type ConversationMessage, ConversationOrchestrator, type ConversationSession, DEFAULT_ANIMATION_CONFIG, EMOTION_NAMES, EMOTION_VECTOR_SIZE, type EmotionAnimationMap, EmotionController, type EmotionLabel, type EmotionName, type EmotionPresetName, EmotionPresets, type EmotionWeights, EmphasisDetector, EventEmitter, type FetchWithCacheOptions, HF_CDN_TEST_URL, type HuggingFaceUrlInfo, INFERENCE_LATENCY_BUCKETS, type InferenceSpanAttributes, type InterruptionConfig, type InterruptionEvents, InterruptionHandler, type LAMFrame, LAMPipeline, type LAMPipelineOptions, LAM_BLENDSHAPES, MODEL_LOAD_TIME_BUCKETS, type MessageRole, type MetricData, MetricNames, MicrophoneCapture, type MicrophoneCaptureConfig, ModelCache, type ModelSpanAttributes, OTLPExporter, type OTLPExporterConfig, OmoteEvents, OmoteTelemetry, type OrchestratorConfig, type OrchestratorEvents, type QuotaInfo, RingBuffer, type RuntimeBackend, type SafariSpeechConfig, SafariSpeechRecognition, type SamplingConfig, type SessionConfig, type SessionOptions, type SessionSnapshot, type SileroVADBackend, type SileroVADConfig, type SileroVADFactoryConfig, SileroVADInference, SileroVADWorker, type SpanAttributes, type SpanData, type SpeechErrorCallback, type SpeechRecognitionResult, type SpeechResultCallback, type SpeechSegment, SyncedAudioPipeline, type SyncedAudioPipelineEvents, type SyncedAudioPipelineOptions, type TelemetryConfig, type TelemetryExporter, type TelemetryExporterInterface, type TenantConfig, TenantManager, type TenantQuota, type TenantUsage, type TokenRefreshCallback, type TranscriptionResult, type Transition, type VADBackend, type VADModelInfo, type VADResult$1 as VADResult, type VADWorkerConfig, type VADWorkerModelInfo, type ValidationResult, type VoiceConfig, Wav2Vec2Inference, type Wav2Vec2InferenceConfig, type Wav2Vec2Result, type WhisperConfig, WhisperInference, type WhisperModel, blendEmotions, calculatePeak, calculateRMS, clearSpecificCache, clearTransformersCache, configureCacheLimit, configureTelemetry, createEmotionVector, createSessionWithFallback, createSileroVAD, fetchWithCache, formatBytes, getCacheConfig, getCacheKey, getEmotionPreset, getLoadedBackend, getModelCache, getOnnxRuntime, getOnnxRuntimeForPreference, getOptimalWasmThreads, getRecommendedBackend, getSessionOptions, getTelemetry, hasWebGPUApi, isAndroid, isHuggingFaceCDNReachable, isIOS, isIOSSafari, isMobile, isOnnxRuntimeLoaded, isSpeechRecognitionAvailable, isWebGPUAvailable, lerpEmotion, listCaches, nukeBrowserCaches, parseHuggingFaceUrl, preloadModels, resolveBackend, scanForInvalidCaches, shouldEnableWasmProxy, shouldUseNativeASR, shouldUseServerLipSync, supportsVADWorker, validateCachedResponse };