@omote/core 0.6.6 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -16
- package/dist/{Logger-I_k4sGhM.d.mts → Logger-DSoGAYJu.d.mts} +1 -1
- package/dist/{Logger-I_k4sGhM.d.ts → Logger-DSoGAYJu.d.ts} +1 -1
- package/dist/index.d.mts +1744 -972
- package/dist/index.d.ts +1744 -972
- package/dist/index.js +5293 -2735
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +6454 -3896
- package/dist/index.mjs.map +1 -1
- package/dist/logging/index.d.mts +2 -2
- package/dist/logging/index.d.ts +2 -2
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -5,10 +5,13 @@
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- **Lip Sync (A2E)** — Audio to 52 ARKit blendshapes via Wav2Vec2, with automatic GPU/CPU platform detection
|
|
8
|
-
- **
|
|
8
|
+
- **PlaybackPipeline** — TTS audio playback to lip sync with ExpressionProfile scaling, gapless scheduling
|
|
9
9
|
- **Speech Recognition** — SenseVoice ASR (ONNX), 15x faster than Whisper, progressive transcription
|
|
10
10
|
- **Voice Activity Detection** — Silero VAD with Worker and main-thread modes
|
|
11
|
-
- **Text-to-Speech** —
|
|
11
|
+
- **Text-to-Speech** — Kokoro TTS (82M q8, experimental) with TTSBackend interface for custom engines
|
|
12
|
+
- **CharacterController** — Renderer-agnostic avatar composition (compositor + gaze + life layer)
|
|
13
|
+
- **TTSPlayback** — Composes TTSBackend + PlaybackPipeline for text → lip sync
|
|
14
|
+
- **VoicePipeline** — Full conversational agent loop with local TTS support (cloud or offline)
|
|
12
15
|
- **Animation Graph** — State machine (idle/listening/thinking/speaking) with emotion blending
|
|
13
16
|
- **Emotion Controller** — Preset-based emotion system with smooth transitions
|
|
14
17
|
- **Model Caching** — IndexedDB with versioning, LRU eviction, and quota monitoring
|
|
@@ -27,34 +30,33 @@ Peer dependency: `onnxruntime-web` is included — no additional installs needed
|
|
|
27
30
|
|
|
28
31
|
## Quick Start
|
|
29
32
|
|
|
30
|
-
###
|
|
33
|
+
### PlaybackPipeline (TTS Lip Sync)
|
|
31
34
|
|
|
32
35
|
The most common use case: feed TTS audio chunks and get back 52 ARKit blendshape frames at render rate.
|
|
33
36
|
|
|
34
37
|
```typescript
|
|
35
|
-
import {
|
|
38
|
+
import { PlaybackPipeline, createA2E } from '@omote/core';
|
|
36
39
|
|
|
37
40
|
// 1. Create A2E backend (auto-detects GPU vs CPU)
|
|
38
41
|
const lam = createA2E(); // auto-detects GPU vs CPU, fetches from HF CDN (192MB fp16)
|
|
39
42
|
await lam.load();
|
|
40
43
|
|
|
41
44
|
// 2. Create pipeline with expression profile
|
|
42
|
-
const pipeline = new
|
|
45
|
+
const pipeline = new PlaybackPipeline({
|
|
43
46
|
lam,
|
|
44
47
|
sampleRate: 16000,
|
|
45
48
|
profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
|
|
46
49
|
});
|
|
47
|
-
await pipeline.initialize();
|
|
48
50
|
|
|
49
51
|
// 3. Listen for blendshape frames
|
|
50
|
-
pipeline.on('
|
|
52
|
+
pipeline.on('frame', (frame) => {
|
|
51
53
|
applyToAvatar(frame.blendshapes); // ExpressionProfile-scaled, 52 ARKit weights
|
|
52
54
|
});
|
|
53
55
|
|
|
54
56
|
// 4. Feed TTS audio and play
|
|
55
57
|
pipeline.start();
|
|
56
|
-
|
|
57
|
-
|
|
58
|
+
pipeline.feedBuffer(ttsAudioChunk); // Uint8Array PCM16
|
|
59
|
+
pipeline.end(); // Flush remaining audio
|
|
58
60
|
```
|
|
59
61
|
|
|
60
62
|
## API Reference
|
|
@@ -87,29 +89,28 @@ const { blendshapes } = await lam.infer(audioSamples);
|
|
|
87
89
|
const jawOpen = blendshapes[LAM_BLENDSHAPES.indexOf('jawOpen')];
|
|
88
90
|
```
|
|
89
91
|
|
|
90
|
-
###
|
|
92
|
+
### PlaybackPipeline
|
|
91
93
|
|
|
92
94
|
End-to-end TTS playback with lip sync inference, audio scheduling, and ExpressionProfile scaling.
|
|
93
95
|
|
|
94
96
|
```typescript
|
|
95
|
-
import {
|
|
97
|
+
import { PlaybackPipeline } from '@omote/core';
|
|
96
98
|
|
|
97
|
-
const pipeline = new
|
|
99
|
+
const pipeline = new PlaybackPipeline({
|
|
98
100
|
lam, // A2E backend from createA2E()
|
|
99
101
|
sampleRate: 16000,
|
|
100
102
|
profile: { mouth: 1.0, jaw: 1.0, brows: 0.6, eyes: 0.0, cheeks: 0.5, nose: 0.3, tongue: 0.5 },
|
|
101
103
|
});
|
|
102
|
-
await pipeline.initialize();
|
|
103
104
|
|
|
104
|
-
pipeline.on('
|
|
105
|
+
pipeline.on('frame', (frame) => {
|
|
105
106
|
// frame.blendshapes — ExpressionProfile-scaled
|
|
106
107
|
// frame.rawBlendshapes — unscaled original values
|
|
107
108
|
applyToAvatar(frame.blendshapes);
|
|
108
109
|
});
|
|
109
110
|
|
|
110
111
|
pipeline.start();
|
|
111
|
-
|
|
112
|
-
|
|
112
|
+
pipeline.feedBuffer(chunk); // feed TTS audio (Uint8Array PCM16)
|
|
113
|
+
pipeline.end(); // flush final partial chunk
|
|
113
114
|
```
|
|
114
115
|
|
|
115
116
|
### A2EProcessor
|
|
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
|
|
|
138
138
|
*/
|
|
139
139
|
declare function getNoopLogger(): ILogger;
|
|
140
140
|
|
|
141
|
-
export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L,
|
|
141
|
+
export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
|
|
@@ -138,4 +138,4 @@ declare const noopLogger: ILogger;
|
|
|
138
138
|
*/
|
|
139
139
|
declare function getNoopLogger(): ILogger;
|
|
140
140
|
|
|
141
|
-
export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L,
|
|
141
|
+
export { DEFAULT_LOGGING_CONFIG as D, type ILogger as I, type LogFormatter as L, LOG_LEVEL_PRIORITY as a, type LogEntry as b, type LogLevel as c, type LogSink as d, type LoggingConfig as e, clearLoggerCache as f, configureLogging as g, createLogger as h, getLoggingConfig as i, getNoopLogger as j, setLoggingEnabled as k, noopLogger as n, resetLoggingConfig as r, setLogLevel as s };
|