@omote/core 0.6.4 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,10 +5,13 @@
5
5
  ## Features
6
6
 
7
7
  - **Lip Sync (A2E)** — Audio to 52 ARKit blendshapes via Wav2Vec2, with automatic GPU/CPU platform detection
8
- - **Full-Face Pipeline** — TTS audio playback to lip sync with ExpressionProfile scaling, gapless scheduling
8
+ - **PlaybackPipeline** — TTS audio playback to lip sync with ExpressionProfile scaling, gapless scheduling
9
9
  - **Speech Recognition** — SenseVoice ASR (ONNX), 15x faster than Whisper, progressive transcription
10
10
  - **Voice Activity Detection** — Silero VAD with Worker and main-thread modes
11
- - **Text-to-Speech** — ChatterboxTurbo (experimental, use server-side TTS for production)
11
+ - **Text-to-Speech** — Kokoro TTS (82M q8, experimental) with TTSBackend interface for custom engines
12
+ - **CharacterController** — Renderer-agnostic avatar composition (compositor + gaze + life layer)
13
+ - **TTSPlayback** — Composes TTSBackend + PlaybackPipeline for text → lip sync
14
+ - **VoicePipeline** — Full conversational agent loop with local TTS support (cloud or offline)
12
15
  - **Animation Graph** — State machine (idle/listening/thinking/speaking) with emotion blending
13
16
  - **Emotion Controller** — Preset-based emotion system with smooth transitions
14
17
  - **Model Caching** — IndexedDB with versioning, LRU eviction, and quota monitoring
@@ -27,34 +30,33 @@ Peer dependency: `onnxruntime-web` is included — no additional installs needed
27
30
 
28
31
  ## Quick Start
29
32
 
30
- ### FullFacePipeline (TTS Lip Sync)
33
+ ### PlaybackPipeline (TTS Lip Sync)
31
34
 
32
35
  The most common use case: feed TTS audio chunks and get back 52 ARKit blendshape frames at render rate.
33
36
 
34
37
  ```typescript
35
- import { FullFacePipeline, createA2E } from '@omote/core';
38
+ import { PlaybackPipeline, createA2E } from '@omote/core';
36
39
 
37
40
  // 1. Create A2E backend (auto-detects GPU vs CPU)
38
41
  const lam = createA2E(); // auto-detects GPU vs CPU, fetches from HF CDN (192MB fp16)
39
42
  await lam.load();
40
43
 
41
44
  // 2. Create pipeline with expression profile
42
- const pipeline = new FullFacePipeline({
45
+ const pipeline = new PlaybackPipeline({
43
46
  lam,
44
47
  sampleRate: 16000,
45
48
  profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
46
49
  });
47
- await pipeline.initialize();
48
50
 
49
51
  // 3. Listen for blendshape frames
50
- pipeline.on('full_frame_ready', (frame) => {
52
+ pipeline.on('frame', (frame) => {
51
53
  applyToAvatar(frame.blendshapes); // ExpressionProfile-scaled, 52 ARKit weights
52
54
  });
53
55
 
54
56
  // 4. Feed TTS audio and play
55
57
  pipeline.start();
56
- await pipeline.onAudioChunk(ttsAudioChunk); // Uint8Array PCM16
57
- await pipeline.end(); // Flush remaining audio
58
+ pipeline.feedBuffer(ttsAudioChunk); // Uint8Array PCM16
59
+ pipeline.end(); // Flush remaining audio
58
60
  ```
59
61
 
60
62
  ## API Reference
@@ -87,29 +89,28 @@ const { blendshapes } = await lam.infer(audioSamples);
87
89
  const jawOpen = blendshapes[LAM_BLENDSHAPES.indexOf('jawOpen')];
88
90
  ```
89
91
 
90
- ### FullFacePipeline
92
+ ### PlaybackPipeline
91
93
 
92
94
  End-to-end TTS playback with lip sync inference, audio scheduling, and ExpressionProfile scaling.
93
95
 
94
96
  ```typescript
95
- import { FullFacePipeline } from '@omote/core';
97
+ import { PlaybackPipeline } from '@omote/core';
96
98
 
97
- const pipeline = new FullFacePipeline({
99
+ const pipeline = new PlaybackPipeline({
98
100
  lam, // A2E backend from createA2E()
99
101
  sampleRate: 16000,
100
102
  profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
101
103
  });
102
- await pipeline.initialize();
103
104
 
104
- pipeline.on('full_frame_ready', (frame) => {
105
+ pipeline.on('frame', (frame) => {
105
106
  // frame.blendshapes — ExpressionProfile-scaled
106
107
  // frame.rawBlendshapes — unscaled original values
107
108
  applyToAvatar(frame.blendshapes);
108
109
  });
109
110
 
110
111
  pipeline.start();
111
- await pipeline.onAudioChunk(chunk); // feed TTS audio (Uint8Array PCM16)
112
- await pipeline.end(); // flush final partial chunk
112
+ pipeline.feedBuffer(chunk); // feed TTS audio (Uint8Array PCM16)
113
+ pipeline.end(); // flush final partial chunk
113
114
  ```
114
115
 
115
116
  ### A2EProcessor
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
138
138
  */
139
139
  declare function getNoopLogger(): ILogger;
140
140
 
141
- export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, type LogLevel as a, type LogEntry as b, type LogSink as c, type LoggingConfig as d, LOG_LEVEL_PRIORITY as e, configureLogging as f, getLoggingConfig as g, setLoggingEnabled as h, createLogger as i, clearLoggerCache as j, getNoopLogger as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
141
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
138
138
  */
139
139
  declare function getNoopLogger(): ILogger;
140
140
 
141
- export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, type LogLevel as a, type LogEntry as b, type LogSink as c, type LoggingConfig as d, LOG_LEVEL_PRIORITY as e, configureLogging as f, getLoggingConfig as g, setLoggingEnabled as h, createLogger as i, clearLoggerCache as j, getNoopLogger as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
141
+ export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };