@omote/core 0.6.6 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,11 +4,18 @@
4
4
 
5
5
  ## Features
6
6
 
7
- - **Lip Sync (A2E)** — Audio to 52 ARKit blendshapes via Wav2Vec2, with automatic GPU/CPU platform detection
8
- - **Full-Face Pipeline** — TTS audio playback to lip sync with ExpressionProfile scaling, gapless scheduling
7
+ - **Lip Sync (A2E)** — Audio to 52 ARKit blendshapes via LAM, with automatic WebGPU/WASM platform detection
8
+ - **PlaybackPipeline** — TTS audio playback to lip sync with ExpressionProfile scaling, gapless scheduling
9
9
  - **Speech Recognition** — SenseVoice ASR (ONNX), 15x faster than Whisper, progressive transcription
10
10
  - **Voice Activity Detection** — Silero VAD with Worker and main-thread modes
11
- - **Text-to-Speech** — ChatterboxTurbo (experimental, use server-side TTS for production)
11
+ - **Text-to-Speech** — Kokoro TTS (82M q8, offline) with TTSBackend interface for custom engines
12
+ - **CharacterController** — Renderer-agnostic avatar composition (compositor + gaze + life layer)
13
+ - **TTSPlayback** — Composes TTSBackend + PlaybackPipeline for text → lip sync
14
+ - **TTSSpeaker** — High-level speak(text) with abort, queueing, and LLM streaming
15
+ - **SpeechListener** — Mic → VAD → ASR orchestration with adaptive silence detection
16
+ - **createTTSPlayer()** — Factory composing Kokoro TTS + TTSSpeaker for zero-config playback
17
+ - **VoicePipeline** — Full conversational agent loop with local TTS support (cloud or offline)
18
+ - **configureOrtCdn()** — Enterprise CDN override for ORT WASM/WebGPU binaries
12
19
  - **Animation Graph** — State machine (idle/listening/thinking/speaking) with emotion blending
13
20
  - **Emotion Controller** — Preset-based emotion system with smooth transitions
14
21
  - **Model Caching** — IndexedDB with versioning, LRU eviction, and quota monitoring
@@ -27,34 +34,33 @@ Peer dependency: `onnxruntime-web` is included — no additional installs needed
27
34
 
28
35
  ## Quick Start
29
36
 
30
- ### FullFacePipeline (TTS Lip Sync)
37
+ ### PlaybackPipeline (TTS Lip Sync)
31
38
 
32
39
  The most common use case: feed TTS audio chunks and get back 52 ARKit blendshape frames at render rate.
33
40
 
34
41
  ```typescript
35
- import { FullFacePipeline, createA2E } from '@omote/core';
42
+ import { PlaybackPipeline, createA2E } from '@omote/core';
36
43
 
37
44
  // 1. Create A2E backend (auto-detects GPU vs CPU)
38
45
  const lam = createA2E(); // auto-detects GPU vs CPU, fetches from HF CDN (192MB fp16)
39
46
  await lam.load();
40
47
 
41
48
  // 2. Create pipeline with expression profile
42
- const pipeline = new FullFacePipeline({
49
+ const pipeline = new PlaybackPipeline({
43
50
  lam,
44
51
  sampleRate: 16000,
45
52
  profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
46
53
  });
47
- await pipeline.initialize();
48
54
 
49
55
  // 3. Listen for blendshape frames
50
- pipeline.on('full_frame_ready', (frame) => {
56
+ pipeline.on('frame', (frame) => {
51
57
  applyToAvatar(frame.blendshapes); // ExpressionProfile-scaled, 52 ARKit weights
52
58
  });
53
59
 
54
60
  // 4. Feed TTS audio and play
55
61
  pipeline.start();
56
- await pipeline.onAudioChunk(ttsAudioChunk); // Uint8Array PCM16
57
- await pipeline.end(); // Flush remaining audio
62
+ pipeline.feedBuffer(ttsAudioChunk); // Uint8Array PCM16
63
+ pipeline.end(); // Flush remaining audio
58
64
  ```
59
65
 
60
66
  ## API Reference
@@ -68,7 +74,7 @@ Auto-detects platform: Chrome/Edge/Android use WebGPU, Safari/iOS use WASM CPU f
68
74
  ```typescript
69
75
  import { createA2E } from '@omote/core';
70
76
 
71
- const a2e = createA2E(); // auto-detects: GPU (192MB fp16) or CPU (404MB WASM)
77
+ const a2e = createA2E(); // auto-detects: WebGPU on Chrome/Edge, WASM on Safari/iOS/Firefox
72
78
  await a2e.load();
73
79
 
74
80
  const { blendshapes } = await a2e.infer(audioSamples); // Float32Array (16kHz)
@@ -78,38 +84,37 @@ const { blendshapes } = await a2e.infer(audioSamples); // Float32Array (16kHz)
78
84
  #### Direct API
79
85
 
80
86
  ```typescript
81
- import { Wav2Vec2Inference, LAM_BLENDSHAPES } from '@omote/core';
87
+ import { A2EInference, ARKIT_BLENDSHAPES } from '@omote/core';
82
88
 
83
- const lam = new Wav2Vec2Inference({ modelUrl: '/models/model_fp16.onnx' });
89
+ const lam = new A2EInference({ modelUrl: '/models/model_fp16.onnx' });
84
90
  await lam.load();
85
91
 
86
92
  const { blendshapes } = await lam.infer(audioSamples);
87
- const jawOpen = blendshapes[LAM_BLENDSHAPES.indexOf('jawOpen')];
93
+ const jawOpen = blendshapes[ARKIT_BLENDSHAPES.indexOf('jawOpen')];
88
94
  ```
89
95
 
90
- ### FullFacePipeline
96
+ ### PlaybackPipeline
91
97
 
92
98
  End-to-end TTS playback with lip sync inference, audio scheduling, and ExpressionProfile scaling.
93
99
 
94
100
  ```typescript
95
- import { FullFacePipeline } from '@omote/core';
101
+ import { PlaybackPipeline } from '@omote/core';
96
102
 
97
- const pipeline = new FullFacePipeline({
103
+ const pipeline = new PlaybackPipeline({
98
104
  lam, // A2E backend from createA2E()
99
105
  sampleRate: 16000,
100
106
  profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
101
107
  });
102
- await pipeline.initialize();
103
108
 
104
- pipeline.on('full_frame_ready', (frame) => {
109
+ pipeline.on('frame', (frame) => {
105
110
  // frame.blendshapes — ExpressionProfile-scaled
106
111
  // frame.rawBlendshapes — unscaled original values
107
112
  applyToAvatar(frame.blendshapes);
108
113
  });
109
114
 
110
115
  pipeline.start();
111
- await pipeline.onAudioChunk(chunk); // feed TTS audio (Uint8Array PCM16)
112
- await pipeline.end(); // flush final partial chunk
116
+ pipeline.feedBuffer(chunk); // feed TTS audio (Uint8Array PCM16)
117
+ pipeline.end(); // flush final partial chunk
113
118
  ```
114
119
 
115
120
  ### A2EProcessor
@@ -309,8 +314,6 @@ Place models in your public assets directory:
309
314
  ```
310
315
  public/models/
311
316
  model_fp16.onnx # A2E lip sync — WebGPU (192MB fp16, from omote-ai/lam-a2e)
312
- wav2arkit_cpu.onnx # A2E lip sync — WASM fallback (1.86MB graph)
313
- wav2arkit_cpu.onnx.data # A2E lip sync — WASM fallback (402MB weights)
314
317
  sensevoice/model.int8.onnx # SenseVoice ASR (239MB)
315
318
  silero-vad.onnx # Voice activity detection (~2MB)
316
319
  ```
@@ -338,7 +341,7 @@ const webgpu = await isWebGPUAvailable();
338
341
  All iOS browsers use WebKit under the hood. The SDK handles three platform constraints automatically:
339
342
 
340
343
  1. **WASM binary selection** — iOS crashes with the default JSEP/ASYNCIFY WASM binary. The SDK imports `onnxruntime-web/wasm` (non-JSEP) on iOS/Safari.
341
- 2. **A2E model fallback** — The Wav2Vec2 GPU model exceeds iOS memory limits. `createA2E({ mode: 'auto' })` automatically selects the `wav2arkit_cpu` model on iOS.
344
+ 2. **A2E model routing** — `createA2E()` routes all platforms through `A2EInference` via `UnifiedInferenceWorker`. WebGPU on Chrome/Edge, WASM on Safari/iOS/Firefox.
342
345
  3. **Worker memory** — Multiple Workers each load their own ORT WASM runtime, exceeding iOS tab memory (~1.5GB). The SDK defaults to main-thread inference on iOS.
343
346
 
344
347
  **Consumer requirement:** COEP/COOP headers must be skipped for iOS to avoid triggering SharedArrayBuffer (which forces threaded WASM with 4GB shared memory — crashes iOS). Desktop should keep COEP/COOP for multi-threaded performance.
@@ -347,7 +350,7 @@ All iOS browsers use WebKit under the hood. The SDK handles three platform const
347
350
  |---------|------------|-------|
348
351
  | Silero VAD | Works | 0.9ms latency |
349
352
  | SenseVoice ASR | Works | WASM, ~200ms |
350
- | A2E Lip Sync | Works | wav2arkit_cpu via createA2E auto-detect, ~45ms |
353
+ | A2E Lip Sync | Works | A2EInference (WASM) via createA2E(), ~45ms |
351
354
 
352
355
  ## License
353
356
 
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Logging types for Omote SDK
3
+ *
4
+ * 6-level logging system with structured output:
5
+ * - error: Critical failures that prevent operation
6
+ * - warn: Recoverable issues or degraded performance
7
+ * - info: Key lifecycle events (model loaded, inference complete)
8
+ * - debug: Detailed operational info for development
9
+ * - trace: Fine-grained tracing for performance analysis
10
+ * - verbose: Extremely detailed output (tensor shapes, intermediate values)
11
+ */
12
+ type LogLevel = 'error' | 'warn' | 'info' | 'debug' | 'trace' | 'verbose';
13
+ /**
14
+ * Numeric priority for log levels (lower = more severe)
15
+ */
16
+ declare const LOG_LEVEL_PRIORITY: Record<LogLevel, number>;
17
+ /**
18
+ * Structured log entry
19
+ */
20
+ interface LogEntry {
21
+ /** Unix timestamp in milliseconds */
22
+ timestamp: number;
23
+ /** Log level */
24
+ level: LogLevel;
25
+ /** Module name (e.g., 'LocalInference', 'ModelCache') */
26
+ module: string;
27
+ /** Human-readable message */
28
+ message: string;
29
+ /** Optional structured data */
30
+ data?: Record<string, unknown>;
31
+ /** Optional error object */
32
+ error?: Error;
33
+ /** Trace ID from active telemetry span (log-to-span correlation) */
34
+ traceId?: string;
35
+ /** Span ID from active telemetry span (log-to-span correlation) */
36
+ spanId?: string;
37
+ }
38
+ /**
39
+ * Log output sink interface
40
+ */
41
+ interface LogSink {
42
+ (entry: LogEntry): void;
43
+ }
44
+ /**
45
+ * Log formatter interface
46
+ */
47
+ interface LogFormatter {
48
+ (entry: LogEntry): string;
49
+ }
50
+ /**
51
+ * Global logging configuration
52
+ */
53
+ interface LoggingConfig {
54
+ /** Minimum log level to output (default: 'info') */
55
+ level: LogLevel;
56
+ /** Enable/disable logging globally (default: true) */
57
+ enabled: boolean;
58
+ /** Output format: 'json' for structured, 'pretty' for human-readable */
59
+ format: 'json' | 'pretty';
60
+ /** Custom output sink (default: console) */
61
+ sink?: LogSink;
62
+ /** Include timestamps in output (default: true) */
63
+ timestamps?: boolean;
64
+ /** Include module name in output (default: true) */
65
+ includeModule?: boolean;
66
+ }
67
+ /**
68
+ * Logger interface for module-specific logging
69
+ */
70
+ interface ILogger {
71
+ error(message: string, data?: Record<string, unknown>): void;
72
+ warn(message: string, data?: Record<string, unknown>): void;
73
+ info(message: string, data?: Record<string, unknown>): void;
74
+ debug(message: string, data?: Record<string, unknown>): void;
75
+ trace(message: string, data?: Record<string, unknown>): void;
76
+ verbose(message: string, data?: Record<string, unknown>): void;
77
+ /** Create a child logger with a sub-module name */
78
+ child(subModule: string): ILogger;
79
+ /** Get the module name for this logger */
80
+ readonly module: string;
81
+ }
82
+ /**
83
+ * Default configuration
84
+ */
85
+ declare const DEFAULT_LOGGING_CONFIG: LoggingConfig;
86
+
87
+ /**
88
+ * Omote SDK Logger
89
+ *
90
+ * Unified logging system with:
91
+ * - 6 log levels (error, warn, info, debug, trace, verbose)
92
+ * - Structured JSON output for machine parsing
93
+ * - Pretty output for human readability
94
+ * - Module-based child loggers
95
+ * - Runtime configuration
96
+ * - Browser and Node.js compatible
97
+ */
98
+
99
+ /**
100
+ * Configure global logging settings
101
+ */
102
+ declare function configureLogging(config: Partial<LoggingConfig>): void;
103
+ /**
104
+ * Get current logging configuration
105
+ */
106
+ declare function getLoggingConfig(): LoggingConfig;
107
+ /**
108
+ * Reset logging configuration to defaults
109
+ */
110
+ declare function resetLoggingConfig(): void;
111
+ /**
112
+ * Set log level at runtime
113
+ */
114
+ declare function setLogLevel(level: LogLevel): void;
115
+ /**
116
+ * Enable or disable logging
117
+ */
118
+ declare function setLoggingEnabled(enabled: boolean): void;
119
+ /**
120
+ * Create a logger for a specific module
121
+ *
122
+ * @param module - Module name (e.g., 'LocalInference', 'ModelCache')
123
+ * @returns Logger instance
124
+ *
125
+ * @example
126
+ * ```typescript
127
+ * const logger = createLogger('LocalInference');
128
+ * logger.info('Model loaded', { backend: 'webgpu', loadTimeMs: 1234 });
129
+ * ```
130
+ */
131
+ declare function createLogger(module: string): ILogger;
132
+ /**
133
+ * Clear logger cache (useful for testing)
134
+ */
135
+ declare function clearLoggerCache(): void;
136
+ /**
137
+ * No-op logger for when logging is completely disabled
138
+ */
139
+ declare const noopLogger: ILogger;
140
+ /**
141
+ * Get a no-op logger (for production builds that tree-shake logging)
142
+ */
143
+ declare function getNoopLogger(): ILogger;
144
+
145
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Logging types for Omote SDK
3
+ *
4
+ * 6-level logging system with structured output:
5
+ * - error: Critical failures that prevent operation
6
+ * - warn: Recoverable issues or degraded performance
7
+ * - info: Key lifecycle events (model loaded, inference complete)
8
+ * - debug: Detailed operational info for development
9
+ * - trace: Fine-grained tracing for performance analysis
10
+ * - verbose: Extremely detailed output (tensor shapes, intermediate values)
11
+ */
12
+ type LogLevel = 'error' | 'warn' | 'info' | 'debug' | 'trace' | 'verbose';
13
+ /**
14
+ * Numeric priority for log levels (lower = more severe)
15
+ */
16
+ declare const LOG_LEVEL_PRIORITY: Record<LogLevel, number>;
17
+ /**
18
+ * Structured log entry
19
+ */
20
+ interface LogEntry {
21
+ /** Unix timestamp in milliseconds */
22
+ timestamp: number;
23
+ /** Log level */
24
+ level: LogLevel;
25
+ /** Module name (e.g., 'LocalInference', 'ModelCache') */
26
+ module: string;
27
+ /** Human-readable message */
28
+ message: string;
29
+ /** Optional structured data */
30
+ data?: Record<string, unknown>;
31
+ /** Optional error object */
32
+ error?: Error;
33
+ /** Trace ID from active telemetry span (log-to-span correlation) */
34
+ traceId?: string;
35
+ /** Span ID from active telemetry span (log-to-span correlation) */
36
+ spanId?: string;
37
+ }
38
+ /**
39
+ * Log output sink interface
40
+ */
41
+ interface LogSink {
42
+ (entry: LogEntry): void;
43
+ }
44
+ /**
45
+ * Log formatter interface
46
+ */
47
+ interface LogFormatter {
48
+ (entry: LogEntry): string;
49
+ }
50
+ /**
51
+ * Global logging configuration
52
+ */
53
+ interface LoggingConfig {
54
+ /** Minimum log level to output (default: 'info') */
55
+ level: LogLevel;
56
+ /** Enable/disable logging globally (default: true) */
57
+ enabled: boolean;
58
+ /** Output format: 'json' for structured, 'pretty' for human-readable */
59
+ format: 'json' | 'pretty';
60
+ /** Custom output sink (default: console) */
61
+ sink?: LogSink;
62
+ /** Include timestamps in output (default: true) */
63
+ timestamps?: boolean;
64
+ /** Include module name in output (default: true) */
65
+ includeModule?: boolean;
66
+ }
67
+ /**
68
+ * Logger interface for module-specific logging
69
+ */
70
+ interface ILogger {
71
+ error(message: string, data?: Record<string, unknown>): void;
72
+ warn(message: string, data?: Record<string, unknown>): void;
73
+ info(message: string, data?: Record<string, unknown>): void;
74
+ debug(message: string, data?: Record<string, unknown>): void;
75
+ trace(message: string, data?: Record<string, unknown>): void;
76
+ verbose(message: string, data?: Record<string, unknown>): void;
77
+ /** Create a child logger with a sub-module name */
78
+ child(subModule: string): ILogger;
79
+ /** Get the module name for this logger */
80
+ readonly module: string;
81
+ }
82
+ /**
83
+ * Default configuration
84
+ */
85
+ declare const DEFAULT_LOGGING_CONFIG: LoggingConfig;
86
+
87
+ /**
88
+ * Omote SDK Logger
89
+ *
90
+ * Unified logging system with:
91
+ * - 6 log levels (error, warn, info, debug, trace, verbose)
92
+ * - Structured JSON output for machine parsing
93
+ * - Pretty output for human readability
94
+ * - Module-based child loggers
95
+ * - Runtime configuration
96
+ * - Browser and Node.js compatible
97
+ */
98
+
99
+ /**
100
+ * Configure global logging settings
101
+ */
102
+ declare function configureLogging(config: Partial<LoggingConfig>): void;
103
+ /**
104
+ * Get current logging configuration
105
+ */
106
+ declare function getLoggingConfig(): LoggingConfig;
107
+ /**
108
+ * Reset logging configuration to defaults
109
+ */
110
+ declare function resetLoggingConfig(): void;
111
+ /**
112
+ * Set log level at runtime
113
+ */
114
+ declare function setLogLevel(level: LogLevel): void;
115
+ /**
116
+ * Enable or disable logging
117
+ */
118
+ declare function setLoggingEnabled(enabled: boolean): void;
119
+ /**
120
+ * Create a logger for a specific module
121
+ *
122
+ * @param module - Module name (e.g., 'LocalInference', 'ModelCache')
123
+ * @returns Logger instance
124
+ *
125
+ * @example
126
+ * ```typescript
127
+ * const logger = createLogger('LocalInference');
128
+ * logger.info('Model loaded', { backend: 'webgpu', loadTimeMs: 1234 });
129
+ * ```
130
+ */
131
+ declare function createLogger(module: string): ILogger;
132
+ /**
133
+ * Clear logger cache (useful for testing)
134
+ */
135
+ declare function clearLoggerCache(): void;
136
+ /**
137
+ * No-op logger for when logging is completely disabled
138
+ */
139
+ declare const noopLogger: ILogger;
140
+ /**
141
+ * Get a no-op logger (for production builds that tree-shake logging)
142
+ */
143
+ declare function getNoopLogger(): ILogger;
144
+
145
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
138
138
  */
139
139
  declare function getNoopLogger(): ILogger;
140
140
 
141
- export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, type LogLevel as a, type LogEntry as b, type LogSink as c, type LoggingConfig as d, LOG_LEVEL_PRIORITY as e, configureLogging as f, getLoggingConfig as g, setLoggingEnabled as h, createLogger as i, clearLoggerCache as j, getNoopLogger as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
141
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
138
138
  */
139
139
  declare function getNoopLogger(): ILogger;
140
140
 
141
- export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, type LogLevel as a, type LogEntry as b, type LogSink as c, type LoggingConfig as d, LOG_LEVEL_PRIORITY as e, configureLogging as f, getLoggingConfig as g, setLoggingEnabled as h, createLogger as i, clearLoggerCache as j, getNoopLogger as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
141
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };