talking-head-studio 0.4.11 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +279 -193
- package/dist/TalkingHead.d.ts +28 -3
- package/dist/TalkingHead.js +21 -2
- package/dist/TalkingHead.web.d.ts +31 -4
- package/dist/TalkingHead.web.js +11 -1
- package/dist/TalkingHeadVisualization.d.ts +22 -0
- package/dist/TalkingHeadVisualization.js +30 -10
- package/dist/api/studioApi.d.ts +12 -1
- package/dist/api/studioApi.js +16 -2
- package/dist/contract.d.ts +14 -0
- package/dist/contract.js +30 -0
- package/dist/core/avatar/avatarCapabilities.d.ts +60 -0
- package/dist/core/avatar/avatarCapabilities.js +100 -0
- package/dist/core/avatar/backends/gaussian.js +6 -4
- package/dist/core/avatar/motion.d.ts +1713 -0
- package/dist/core/avatar/motion.js +550 -0
- package/dist/core/avatar/motionRuntime.d.ts +46 -0
- package/dist/core/avatar/motionRuntime.js +84 -0
- package/dist/core/avatar/schema.d.ts +33 -5
- package/dist/core/avatar/visemes.d.ts +16 -1
- package/dist/core/avatar/visemes.js +48 -1
- package/dist/editor/AvatarCanvas.js +92 -1
- package/dist/editor/AvatarEditor.native.js +1 -0
- package/dist/editor/AvatarModel.js +1 -0
- package/dist/editor/FaceSqueezeEditor.d.ts +3 -1
- package/dist/editor/FaceSqueezeEditor.js +176 -112
- package/dist/editor/FaceSqueezeEditor.web.d.ts +3 -1
- package/dist/editor/FaceSqueezeEditor.web.js +30 -28
- package/dist/editor/RigidAccessory.js +17 -2
- package/dist/editor/SkinnedClothing.js +1 -0
- package/dist/editor/boneLockedDrag.d.ts +11 -0
- package/dist/editor/boneLockedDrag.js +68 -0
- package/dist/editor/boneSnap.web.d.ts +27 -0
- package/dist/editor/boneSnap.web.js +99 -0
- package/dist/editor/index.web.d.ts +10 -0
- package/dist/editor/index.web.js +26 -0
- package/dist/editor/sounds/haha.wav +0 -0
- package/dist/editor/sounds/owie.wav +0 -0
- package/dist/editor/sounds/stop.wav +0 -0
- package/dist/editor/studioTheme.d.ts +14 -14
- package/dist/editor/studioTheme.js +17 -14
- package/dist/editor/types.d.ts +1 -0
- package/dist/html/accessories.d.ts +7 -0
- package/dist/html/accessories.js +149 -0
- package/dist/html/motion.d.ts +1 -0
- package/dist/html/motion.js +189 -0
- package/dist/html/visemes.d.ts +7 -0
- package/dist/html/visemes.js +348 -0
- package/dist/html.d.ts +1 -1
- package/dist/html.js +55 -732
- package/dist/index.d.ts +7 -3
- package/dist/index.js +17 -1
- package/dist/index.web.d.ts +18 -1
- package/dist/index.web.js +36 -3
- package/dist/sketchfab/api.js +1 -0
- package/dist/sketchfab/glbInspect.d.ts +22 -0
- package/dist/sketchfab/glbInspect.js +58 -0
- package/dist/sketchfab/index.d.ts +3 -0
- package/dist/sketchfab/index.js +8 -1
- package/dist/sketchfab/inspectRemote.d.ts +13 -0
- package/dist/sketchfab/inspectRemote.js +77 -0
- package/dist/sketchfab/types.d.ts +10 -0
- package/dist/studio/AccessoryBrowserScreen.d.ts +6 -0
- package/dist/studio/AccessoryBrowserScreen.js +626 -0
- package/dist/studio/AccessoryPanel.d.ts +10 -0
- package/dist/studio/AccessoryPanel.js +396 -0
- package/dist/studio/AppearancePanel.d.ts +9 -0
- package/dist/studio/AppearancePanel.js +77 -0
- package/dist/studio/AvatarCreatorScreen.d.ts +5 -0
- package/dist/studio/AvatarCreatorScreen.js +806 -0
- package/dist/studio/AvatarEditorScreen.d.ts +14 -0
- package/dist/studio/AvatarEditorScreen.js +510 -0
- package/dist/studio/AvatarGrid.d.ts +23 -0
- package/dist/studio/AvatarGrid.js +257 -0
- package/dist/studio/ColorSwatch.d.ts +8 -0
- package/dist/studio/ColorSwatch.js +100 -0
- package/dist/studio/CreateVoiceProfileSheet.d.ts +8 -0
- package/dist/studio/CreateVoiceProfileSheet.js +242 -0
- package/dist/studio/DetailsPanel.d.ts +15 -0
- package/dist/studio/DetailsPanel.js +239 -0
- package/dist/studio/FilamentEditor.d.ts +2 -0
- package/dist/studio/FilamentEditor.js +6 -0
- package/dist/studio/PrecisionPanel.d.ts +2 -0
- package/dist/studio/PrecisionPanel.js +7 -0
- package/dist/studio/PublicGalleryScreen.d.ts +5 -0
- package/dist/studio/PublicGalleryScreen.js +358 -0
- package/dist/studio/SketchfabModelCard.d.ts +20 -0
- package/dist/studio/SketchfabModelCard.js +104 -0
- package/dist/studio/StudioBrowseHeader.d.ts +9 -0
- package/dist/studio/StudioBrowseHeader.js +28 -0
- package/dist/studio/StudioEmptyState.d.ts +8 -0
- package/dist/studio/StudioEmptyState.js +29 -0
- package/dist/studio/StudioFloatingAction.d.ts +13 -0
- package/dist/studio/StudioFloatingAction.js +42 -0
- package/dist/studio/StudioSectionHeader.d.ts +7 -0
- package/dist/studio/StudioSectionHeader.js +27 -0
- package/dist/studio/StudioSurfaceCard.d.ts +8 -0
- package/dist/studio/StudioSurfaceCard.js +20 -0
- package/dist/studio/VoicePanel.d.ts +15 -0
- package/dist/studio/VoicePanel.js +305 -0
- package/dist/studio/constants.d.ts +3 -0
- package/dist/studio/constants.js +6 -0
- package/dist/studio/index.d.ts +29 -0
- package/dist/studio/index.js +54 -0
- package/dist/studio/useSketchfabCapabilities.d.ts +31 -0
- package/dist/studio/useSketchfabCapabilities.js +82 -0
- package/dist/tts/useDirectVisemeStream.js +15 -10
- package/dist/utils/avatarUtils.js +92 -5
- package/dist/utils/faceLandmarkerToShapeWeights.js +2 -4
- package/dist/voice/useAudioPlayer.js +17 -4
- package/dist/voice/useVoicePreview.js +4 -2
- package/dist/wardrobe/index.d.ts +1 -0
- package/dist/wardrobe/index.js +6 -1
- package/dist/wardrobe/useAccessoryGestures.d.ts +20 -0
- package/dist/wardrobe/useAccessoryGestures.js +94 -0
- package/dist/wardrobe/useAvatarWardrobeHydration.js +8 -2
- package/dist/wardrobe/useStudioAvatar.js +11 -2
- package/dist/wardrobe/wardrobeStore.d.ts +2 -0
- package/dist/wardrobe/wardrobeStore.js +12 -2
- package/dist/wgpu/R3FWebGpuCanvas.d.ts +15 -0
- package/dist/wgpu/R3FWebGpuCanvas.js +176 -0
- package/dist/wgpu/WgpuAvatar.d.ts +26 -2
- package/dist/wgpu/WgpuAvatar.js +296 -39
- package/dist/wgpu/accessoryDefaults.d.ts +12 -0
- package/dist/wgpu/accessoryDefaults.js +19 -0
- package/dist/wgpu/blobShim.d.ts +2 -0
- package/dist/wgpu/blobShim.js +191 -0
- package/dist/wgpu/index.d.ts +1 -0
- package/dist/wgpu/index.js +4 -1
- package/dist/wgpu/loadGLTFFromUri.d.ts +2 -0
- package/dist/wgpu/loadGLTFFromUri.js +75 -0
- package/dist/wgpu/morphTables.js +21 -10
- package/dist/wgpu/motionState.d.ts +20 -0
- package/dist/wgpu/motionState.js +31 -0
- package/dist/wgpu/patchThreeForRN.d.ts +28 -0
- package/dist/wgpu/patchThreeForRN.js +292 -0
- package/dist/wgpu/scenePlacement.d.ts +5 -0
- package/dist/wgpu/scenePlacement.js +50 -0
- package/dist/wgpu/useAuthedModelUri.js +4 -2
- package/dist/wgpu/useNativeGLTF.d.ts +7 -0
- package/dist/wgpu/useNativeGLTF.js +36 -0
- package/package.json +97 -31
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# talking-head-studio
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Make any GLB model talk — on the web and on React Native — with phoneme-accurate, audio-aligned lip-sync. With or without blend shapes.**
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/talking-head-studio)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -8,65 +8,123 @@
|
|
|
8
8
|
|
|
9
9
|
---
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## The point: lip-sync that's driven by the audio, not guessed
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
Most avatar libraries flap a jaw open in proportion to audio loudness. That reads as
|
|
14
|
+
"mouth moving," not "speaking." talking-head-studio is built around a different model: a
|
|
15
|
+
**viseme schedule** — a timed list of mouth shapes derived from the actual synthesized
|
|
16
|
+
speech — drives morph targets on the model.
|
|
14
17
|
|
|
15
|
-
|
|
18
|
+
```
|
|
19
|
+
TTS server ──▶ AgentVisemePayload ──▶ scheduleVisemes() ──▶ morph drive
|
|
20
|
+
(word-aligned { cues: [{ viseme, startMs, (this library, (Three.js
|
|
21
|
+
phonemes) endMs }], durationMs } web + native) morph targets)
|
|
22
|
+
```
|
|
16
23
|
|
|
17
|
-
|
|
24
|
+
The wire format is `AgentVisemePayload`: per-phoneme cues using the 9-shape Rhubarb
|
|
25
|
+
vocabulary (`A`–`H`, `X`), each with a start/end time in milliseconds. The library maps
|
|
26
|
+
those onto Oculus viseme morphs and schedules them against the audio clock, so the mouth
|
|
27
|
+
hits each shape *when that sound is actually heard*.
|
|
28
|
+
|
|
29
|
+
This pairs directly with a TTS server that emits viseme timings from real word alignment
|
|
30
|
+
(we built [Qwen3-TTS](https://github.com/sitebay/Qwen3-TTS) for exactly this — it serves
|
|
31
|
+
`AgentVisemePayload` over an SSE endpoint). But the format is open: emit cues from any
|
|
32
|
+
source and the renderer consumes them identically.
|
|
18
33
|
|
|
19
|
-
|
|
34
|
+
### Four lip-sync tiers — every model works
|
|
20
35
|
|
|
21
|
-
|
|
36
|
+
The model decides the fidelity; you don't have to pre-process anything.
|
|
37
|
+
|
|
38
|
+
| Your model has… | Method | Quality |
|
|
22
39
|
|---|---|---|
|
|
23
|
-
|
|
|
24
|
-
|
|
|
25
|
-
|
|
|
26
|
-
|
|
|
40
|
+
| Oculus viseme morphs | Direct morph drive (`MorphTargetBackend`) | Excellent |
|
|
41
|
+
| ARKit blend shapes (52 AUs) | `remapArkitToOculus()` → morph drive | Good |
|
|
42
|
+
| Only `jawOpen` / `mouthOpen` | Amplitude fallback | Acceptable |
|
|
43
|
+
| No face rig at all | Gaussian splat backend *(roadmap — not yet built)* | Excellent |
|
|
27
44
|
|
|
28
|
-
|
|
45
|
+
If a model has no viseme morphs, scheduled cues still fall back to the jaw/amplitude path
|
|
46
|
+
automatically — you never get a frozen face.
|
|
29
47
|
|
|
30
48
|
---
|
|
31
49
|
|
|
32
|
-
##
|
|
50
|
+
## Two renderers, one contract
|
|
33
51
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
AvatarBackend ←────────────── swap without changing anything upstream
|
|
42
|
-
├── MorphTargetBackend ← Three.js morph targets (GLB with blend shapes)
|
|
43
|
-
├── GaussianBackend ← [roadmap] Gaussian splat + FLAME delta transfer
|
|
44
|
-
└── (your backend) ← implement AvatarBackend, plug in
|
|
45
|
-
↓
|
|
46
|
-
Renderer
|
|
47
|
-
├── Web iframe ← TalkingHead.web.tsx (any React app)
|
|
48
|
-
├── React Native wgpu ← WgpuAvatar (native GPU, no WebView latency)
|
|
49
|
-
└── Unity / Unreal ← [roadmap] SDK plugins consuming same contracts
|
|
50
|
-
```
|
|
52
|
+
The same `AgentVisemePayload` / `FaceControl` contract drives both render paths, so you
|
|
53
|
+
write your voice pipeline once:
|
|
54
|
+
|
|
55
|
+
- **Web** — an isolated `<iframe>` running [met4citizen TalkingHead](https://github.com/met4citizen/TalkingHead)
|
|
56
|
+
as the rig (`TalkingHead.web.tsx`). Drop it into any React / Next / Vite app.
|
|
57
|
+
- **React Native** — a native WebGPU renderer (`WgpuAvatar`, via `react-native-wgpu` +
|
|
58
|
+
react-three-fiber). No WebView, no postMessage latency, morphs driven on the GPU.
|
|
51
59
|
|
|
52
|
-
|
|
60
|
+
Capabilities differ slightly between the two — see the [capability matrix](#runtime-capability-matrix).
|
|
53
61
|
|
|
54
62
|
---
|
|
55
63
|
|
|
56
|
-
##
|
|
64
|
+
## Install
|
|
57
65
|
|
|
58
66
|
```bash
|
|
59
|
-
# React Native / Expo
|
|
67
|
+
# React Native / Expo WebView path
|
|
60
68
|
npm install talking-head-studio react-native-webview
|
|
61
69
|
|
|
70
|
+
# React Native / Expo native WebGPU path
|
|
71
|
+
npx expo install react-native-wgpu @react-three/fiber three three-stdlib expo-asset
|
|
72
|
+
|
|
62
73
|
# Web (React, Next.js, Vite)
|
|
63
74
|
npm install talking-head-studio
|
|
64
75
|
```
|
|
65
76
|
|
|
77
|
+
`three`, `@react-three/fiber`, and the platform packages are peer dependencies — bring your
|
|
78
|
+
own versions. `react-native-webview` is only required for the WebView renderer. Native
|
|
79
|
+
WebGPU uses `react-native-wgpu` and must run in a native build, not Expo Go.
|
|
80
|
+
|
|
81
|
+
### React Native / Expo WebGPU setup
|
|
82
|
+
|
|
83
|
+
Native WebGPU needs the React Native new architecture and the WebGPU build of Three.js.
|
|
84
|
+
The example app in `example/` has the full working config; these are the important parts:
|
|
85
|
+
|
|
86
|
+
```jsonc
|
|
87
|
+
// app.json
|
|
88
|
+
{
|
|
89
|
+
"expo": {
|
|
90
|
+
"newArchEnabled": true,
|
|
91
|
+
"plugins": ["expo-asset"]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```js
|
|
97
|
+
// metro.config.js
|
|
98
|
+
const path = require('path');
|
|
99
|
+
const { getDefaultConfig } = require('expo/metro-config');
|
|
100
|
+
|
|
101
|
+
const config = getDefaultConfig(__dirname);
|
|
102
|
+
const nodeModules = path.resolve(__dirname, 'node_modules');
|
|
103
|
+
const threeWebgpu = path.resolve(nodeModules, 'three/build/three.webgpu.js');
|
|
104
|
+
|
|
105
|
+
config.resolver.assetExts.push('glb');
|
|
106
|
+
config.resolver.extraNodeModules = {
|
|
107
|
+
three: threeWebgpu,
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
module.exports = config;
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Build and launch a native app so `WebGPUModule` is linked:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
npx expo prebuild --platform android --no-install
|
|
117
|
+
npx expo run:android
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Expo Go cannot load the native WebGPU module.
|
|
121
|
+
|
|
66
122
|
---
|
|
67
123
|
|
|
68
124
|
## Quick start
|
|
69
125
|
|
|
126
|
+
### Web / React Native component
|
|
127
|
+
|
|
70
128
|
```tsx
|
|
71
129
|
import { useRef } from 'react';
|
|
72
130
|
import { TalkingHead, type TalkingHeadRef } from 'talking-head-studio';
|
|
@@ -80,99 +138,180 @@ export default function Avatar() {
|
|
|
80
138
|
avatarUrl="https://example.com/your-model.glb"
|
|
81
139
|
mood="happy"
|
|
82
140
|
cameraView="upper"
|
|
83
|
-
hairColor="#1a1a2e"
|
|
84
|
-
skinColor="#e0a370"
|
|
85
|
-
accessories={[{
|
|
86
|
-
id: 'sunglasses',
|
|
87
|
-
url: 'https://example.com/sunglasses.glb',
|
|
88
|
-
bone: 'Head',
|
|
89
|
-
position: [0, 0.08, 0.12],
|
|
90
|
-
rotation: [0, 0, 0],
|
|
91
|
-
scale: 1.0,
|
|
92
|
-
}]}
|
|
93
141
|
style={{ width: 400, height: 600 }}
|
|
94
|
-
onReady={() =>
|
|
142
|
+
onReady={() => {
|
|
143
|
+
// Drive the mouth from a viseme schedule (e.g. from your TTS server)
|
|
144
|
+
ref.current?.scheduleVisemes({
|
|
145
|
+
cues: [
|
|
146
|
+
{ viseme: 'A', startMs: 0, endMs: 90 },
|
|
147
|
+
{ viseme: 'E', startMs: 90, endMs: 170 },
|
|
148
|
+
{ viseme: 'X', startMs: 170, endMs: 220 },
|
|
149
|
+
],
|
|
150
|
+
durationMs: 220,
|
|
151
|
+
audioStartedAtMs: Date.now(),
|
|
152
|
+
});
|
|
153
|
+
}}
|
|
95
154
|
/>
|
|
96
155
|
);
|
|
97
156
|
}
|
|
98
157
|
```
|
|
99
158
|
|
|
159
|
+
### Native WebGPU (React Native, no WebView)
|
|
160
|
+
|
|
161
|
+
```tsx
|
|
162
|
+
import { WgpuAvatar, type WgpuAvatarRef } from 'talking-head-studio/wgpu';
|
|
163
|
+
|
|
164
|
+
const ref = useRef<WgpuAvatarRef>(null);
|
|
165
|
+
|
|
166
|
+
<WgpuAvatar
|
|
167
|
+
ref={ref}
|
|
168
|
+
avatarUrl="https://example.com/your-model.glb"
|
|
169
|
+
mood="neutral"
|
|
170
|
+
style={{ flex: 1 }}
|
|
171
|
+
/>;
|
|
172
|
+
// ref.current?.scheduleVisemes(payload) — same contract as the web component
|
|
173
|
+
```
|
|
174
|
+
|
|
100
175
|
---
|
|
101
176
|
|
|
102
|
-
##
|
|
177
|
+
## TalkingHead component — props & ref
|
|
103
178
|
|
|
104
|
-
|
|
179
|
+
### Props
|
|
180
|
+
|
|
181
|
+
| Prop | Type | Default | Description |
|
|
182
|
+
|------|------|---------|-------------|
|
|
183
|
+
| `avatarUrl` | `string` | required | Any `.glb`. Rigged or not. |
|
|
184
|
+
| `authToken` | `string \| null` | `null` | Bearer token for authenticated GLB URLs. |
|
|
185
|
+
| `mood` | `TalkingHeadMood` | `'neutral'` | `neutral \| happy \| sad \| angry \| fear \| disgust \| love \| sleep \| excited \| thinking \| concerned \| surprised` |
|
|
186
|
+
| `cameraView` | `'head' \| 'upper' \| 'full'` | `'upper'` | Framing preset. |
|
|
187
|
+
| `cameraDistance` | `number` | `-0.5` | Zoom offset. Negative = closer. |
|
|
188
|
+
| `hairColor` | `string` | — | Hex color. Applied to materials named `hair`, `fur`. |
|
|
189
|
+
| `skinColor` | `string` | — | Applied to `skin`, `body`, `face`. |
|
|
190
|
+
| `eyeColor` | `string` | — | Applied to `eye`, `iris`. |
|
|
191
|
+
| `accessories` | `TalkingHeadAccessory[]` | `[]` | Bone-attached GLB items. |
|
|
192
|
+
| `onReady` | `() => void` | — | Fired when fully loaded. |
|
|
193
|
+
| `onError` | `(msg: string) => void` | — | Fired on load failure. |
|
|
194
|
+
| `style` | `ViewStyle / CSSProperties` | — | Container style. |
|
|
195
|
+
|
|
196
|
+
### Ref methods
|
|
105
197
|
|
|
106
198
|
```ts
|
|
107
|
-
|
|
199
|
+
// Lip-sync
|
|
200
|
+
ref.current?.scheduleVisemes(payload); // AgentVisemePayload → full timed lip-sync schedule
|
|
201
|
+
ref.current?.clearVisemes();
|
|
202
|
+
ref.current?.sendAmplitude(0.7); // amplitude 0..1 → jaw (fallback / no schedule)
|
|
108
203
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
204
|
+
// Expression & appearance
|
|
205
|
+
ref.current?.setMood('excited');
|
|
206
|
+
ref.current?.setHairColor('#ff0000');
|
|
207
|
+
ref.current?.setSkinColor('#8d5524');
|
|
208
|
+
ref.current?.setEyeColor('#2e86de');
|
|
209
|
+
ref.current?.setAccessories([...]);
|
|
114
210
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
211
|
+
// Body — procedural motions, gestures, poses, animation clips
|
|
212
|
+
ref.current?.dispatchMotion('groove'); // looping procedural motion
|
|
213
|
+
ref.current?.stopMotion();
|
|
214
|
+
ref.current?.playGesture('thumbup'); // upstream hand gesture
|
|
215
|
+
ref.current?.playPose('oneknee'); // upstream pose template
|
|
216
|
+
ref.current?.playAnimation('/animations/wave.glb', { dur: 2 });
|
|
217
|
+
ref.current?.lookAt(120, 80, 500); // turn toward viewport coords
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
The motion vocabulary (`groove`, `wave`, `nod`, `idle`, `attack`, `defend`, `celebrate`,
|
|
221
|
+
plus every upstream gesture/pose name) is exported as typed constants —
|
|
222
|
+
`MOTION_KEYS`, `TALKINGHEAD_GESTURES`, `TALKINGHEAD_POSES`, and the `isMotionKey()` guard —
|
|
223
|
+
from both the package root and `talking-head-studio/contract`.
|
|
224
|
+
|
|
225
|
+
### Runtime capability matrix
|
|
226
|
+
|
|
227
|
+
Both renderers share one API; where native can't match the WebView's upstream rig, it
|
|
228
|
+
falls back to a procedural approximation rather than failing. This table is the honest gap
|
|
229
|
+
list.
|
|
230
|
+
|
|
231
|
+
| Feature | Web (iframe) | Native (WGPU) | Notes |
|
|
232
|
+
|---|:---:|:---:|---|
|
|
233
|
+
| Viseme schedules (`scheduleVisemes`) | ✅ | ✅ | Both consume `AgentVisemePayload`. |
|
|
234
|
+
| Amplitude jaw fallback (`sendAmplitude`) | ✅ | ⚠️ | Web drives jaw from amplitude; native exposes the method for API parity. |
|
|
235
|
+
| Core procedural motions (`groove`, `attack`, `defend`) | ✅ | ✅ | Shared `MOTION_DEFS` source of truth. |
|
|
236
|
+
| Gesture names (`thumbup`, `shrug`, …) | ✅ | ⚠️ | Web delegates to TalkingHead; native uses procedural approximations. |
|
|
237
|
+
| Pose names (`oneknee`, `kneel`, `sitting`, …) | ✅ | ⚠️ | Web delegates to TalkingHead; native uses static procedural poses. |
|
|
238
|
+
| Full mood vocabulary | ✅ | ✅ | All 8 upstream moods + friendly aliases. |
|
|
239
|
+
| External animation clips (`playAnimation`) | ✅ | ⚠️ | Web delegates to TalkingHead; native plays GLB clips via `AnimationMixer`. |
|
|
240
|
+
| Gaze (`lookAt`) | ✅ | ❌ | Native eye/head-gaze bridge is future work. |
|
|
241
|
+
| Listening / mic-reactive mouth | ⚠️ | ❌ | Web can route host-provided audio; native bridge not implemented. |
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## Self-hosting the runtime assets
|
|
246
|
+
|
|
247
|
+
By default the web iframe pulls the TalkingHead rig, three.js, and the HeadAudio model
|
|
248
|
+
from public CDNs (jsDelivr, gstatic). To run fully self-hosted — no external CDN — vendor
|
|
249
|
+
those files and point the renderer at your own origin:
|
|
250
|
+
|
|
251
|
+
```ts
|
|
252
|
+
import { buildAvatarHtml } from 'talking-head-studio/html';
|
|
253
|
+
|
|
254
|
+
const html = buildAvatarHtml({
|
|
255
|
+
avatarUrl: 'https://your-cdn/model.glb',
|
|
256
|
+
vendorBaseUrl: 'https://your-cdn/vendor', // serves three.module.js, talkinghead.mjs, etc.
|
|
257
|
+
// ...
|
|
258
|
+
});
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
`vendorBaseUrl` replaces every CDN reference; `dracoDecoderUrl` overrides the DRACO decoder
|
|
262
|
+
location independently.
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## FaceControl — the lower-level contract
|
|
267
|
+
|
|
268
|
+
If you're writing a custom backend or a game-engine integration, `FaceControl` is the
|
|
269
|
+
single value that flows between a voice pipeline and any avatar backend.
|
|
270
|
+
|
|
271
|
+
```ts
|
|
272
|
+
import type { FaceControl, ExpressionState, HeadPose, EyeGaze } from 'talking-head-studio';
|
|
273
|
+
|
|
274
|
+
type HeadPose = { yaw: number; pitch: number; roll: number }; // each -1..1
|
|
275
|
+
type EyeGaze = { x: number; y: number }; // each -1..1
|
|
119
276
|
|
|
120
277
|
type ExpressionState = {
|
|
121
|
-
jawOpen:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
lowerLipDepress: number;
|
|
128
|
-
cheekRaise: number;
|
|
129
|
-
blinkLeft: number;
|
|
130
|
-
blinkRight: number;
|
|
131
|
-
browInnerUp: number;
|
|
132
|
-
browDownLeft: number;
|
|
133
|
-
browDownRight: number;
|
|
134
|
-
eyeGazeLeft: EyeGaze;
|
|
135
|
-
eyeGazeRight: EyeGaze;
|
|
136
|
-
};
|
|
278
|
+
jawOpen: number; mouthSmile: number; mouthFunnel: number; mouthPucker: number;
|
|
279
|
+
mouthWide: number; upperLipRaise: number; lowerLipDepress: number; cheekRaise: number;
|
|
280
|
+
blinkLeft: number; blinkRight: number; browInnerUp: number;
|
|
281
|
+
browDownLeft: number; browDownRight: number;
|
|
282
|
+
eyeGazeLeft: EyeGaze; eyeGazeRight: EyeGaze;
|
|
283
|
+
}; // all weights 0..1 unless noted
|
|
137
284
|
```
|
|
138
285
|
|
|
139
|
-
|
|
286
|
+
Drive it from a viseme schedule:
|
|
140
287
|
|
|
141
288
|
```ts
|
|
142
289
|
import { useFaceControlsFromVisemes } from 'talking-head-studio';
|
|
143
290
|
|
|
144
|
-
|
|
145
|
-
const faceControl = useFaceControlsFromVisemes(schedule);
|
|
146
|
-
// → { pose: { yaw:0, pitch:0, roll:0 }, expr: { jawOpen: 0.7, ... } }
|
|
291
|
+
const faceControl = useFaceControlsFromVisemes(schedule); // rAF-sampled FaceControl
|
|
147
292
|
```
|
|
148
293
|
|
|
149
|
-
|
|
294
|
+
Or implement a backend against it:
|
|
150
295
|
|
|
151
296
|
```ts
|
|
152
297
|
import type { AvatarBackend, AvatarRenderTarget, FaceControl } from 'talking-head-studio';
|
|
153
298
|
|
|
154
|
-
class
|
|
155
|
-
initialize() {
|
|
156
|
-
attach(target: AvatarRenderTarget) {
|
|
157
|
-
setControl(control: FaceControl) {
|
|
158
|
-
renderFrame() {
|
|
159
|
-
dispose() {
|
|
299
|
+
class MyBackend implements AvatarBackend {
|
|
300
|
+
initialize() {}
|
|
301
|
+
attach(target: AvatarRenderTarget) {}
|
|
302
|
+
setControl(control: FaceControl) {}
|
|
303
|
+
renderFrame() {}
|
|
304
|
+
dispose() {}
|
|
160
305
|
}
|
|
161
306
|
```
|
|
162
307
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
## MorphTargetBackend — Three.js GLB adapter
|
|
308
|
+
### MorphTargetBackend — the built-in Three.js adapter
|
|
166
309
|
|
|
167
|
-
The
|
|
310
|
+
The concrete `AvatarBackend` for GLB-with-morphs. Hand it a loaded scene; it discovers
|
|
311
|
+
morph targets, builds a lookup cache, and drives them from `FaceControl`.
|
|
168
312
|
|
|
169
313
|
```ts
|
|
170
|
-
import
|
|
171
|
-
import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader';
|
|
172
|
-
import { MorphTargetBackend } from 'talking-head-studio';
|
|
173
|
-
|
|
174
|
-
const loader = new GLTFLoader();
|
|
175
|
-
const gltf = await loader.loadAsync('/avatar.glb');
|
|
314
|
+
import { MorphTargetBackend, createNeutralExpression } from 'talking-head-studio';
|
|
176
315
|
|
|
177
316
|
const backend = new MorphTargetBackend(gltf.scene, {
|
|
178
317
|
mood: 'neutral',
|
|
@@ -184,79 +323,27 @@ const backend = new MorphTargetBackend(gltf.scene, {
|
|
|
184
323
|
},
|
|
185
324
|
});
|
|
186
325
|
|
|
187
|
-
// Each frame:
|
|
188
326
|
backend.setControl(faceControl);
|
|
189
327
|
backend.renderFrame();
|
|
190
|
-
|
|
191
|
-
// Debug: what morphs does this model actually have?
|
|
192
|
-
console.log(backend.availableChannels);
|
|
193
|
-
// → { visemes: ['aa','PP','oh',...], expressions: ['jawOpen','blinkLeft',...], gaze: ['lookLeft','lookUp'] }
|
|
328
|
+
console.log(backend.availableChannels); // what this model actually supports
|
|
194
329
|
```
|
|
195
330
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
## ARKit → Oculus remap
|
|
199
|
-
|
|
200
|
-
Models with ARKit blend shapes (52 facial action units) but no Oculus viseme morphs can be remapped analytically — no ML, no FLAME, no artist work.
|
|
331
|
+
### ARKit → Oculus remap (no ML, no artist work)
|
|
201
332
|
|
|
202
333
|
```ts
|
|
203
334
|
import { remapArkitToOculus, getArkitWeightsForViseme } from 'talking-head-studio';
|
|
204
335
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
jawOpen: 0.7,
|
|
208
|
-
mouthLowerDownLeft: 0.4,
|
|
209
|
-
mouthLowerDownRight: 0.4,
|
|
210
|
-
});
|
|
211
|
-
// → { aa: 0.68, PP: 0.03, oh: 0.12, ... }
|
|
212
|
-
|
|
213
|
-
// Bake-time: get the ARKit recipe for a specific viseme
|
|
214
|
-
const recipe = getArkitWeightsForViseme('ou');
|
|
215
|
-
// → { mouthPucker: 0.9, mouthRollLower: 0.3 }
|
|
336
|
+
remapArkitToOculus({ jawOpen: 0.7, mouthLowerDownLeft: 0.4 }); // → { aa: 0.68, oh: 0.12, ... }
|
|
337
|
+
getArkitWeightsForViseme('ou'); // → { mouthPucker: 0.9, ... }
|
|
216
338
|
```
|
|
217
339
|
|
|
218
|
-
The full `ARKIT_TO_OCULUS` coefficient table is exported
|
|
219
|
-
|
|
220
|
-
---
|
|
221
|
-
|
|
222
|
-
## TalkingHead component — props & ref
|
|
223
|
-
|
|
224
|
-
### Props
|
|
225
|
-
|
|
226
|
-
| Prop | Type | Default | Description |
|
|
227
|
-
|------|------|---------|-------------|
|
|
228
|
-
| `avatarUrl` | `string` | required | Any `.glb`. Rigged or not. |
|
|
229
|
-
| `authToken` | `string \| null` | `null` | Bearer token for authenticated GLB URLs. |
|
|
230
|
-
| `mood` | `TalkingHeadMood` | `'neutral'` | `neutral \| happy \| sad \| angry \| excited \| thinking \| concerned \| surprised` |
|
|
231
|
-
| `cameraView` | `'head' \| 'upper' \| 'full'` | `'upper'` | Framing preset. |
|
|
232
|
-
| `cameraDistance` | `number` | `-0.5` | Zoom offset. Negative = closer. |
|
|
233
|
-
| `hairColor` | `string` | — | Hex color. Applied to materials named `hair`, `fur`. |
|
|
234
|
-
| `skinColor` | `string` | — | Applied to `skin`, `body`, `face`. |
|
|
235
|
-
| `eyeColor` | `string` | — | Applied to `eye`, `iris`. |
|
|
236
|
-
| `accessories` | `TalkingHeadAccessory[]` | `[]` | Bone-attached GLB items. |
|
|
237
|
-
| `onReady` | `() => void` | — | Fired when fully loaded. |
|
|
238
|
-
| `onError` | `(msg: string) => void` | — | Fired on load failure. |
|
|
239
|
-
| `style` | `ViewStyle / CSSProperties` | — | Container style. |
|
|
240
|
-
|
|
241
|
-
### Ref methods
|
|
242
|
-
|
|
243
|
-
```ts
|
|
244
|
-
ref.current?.sendAmplitude(0.7); // amplitude 0..1 → jaw
|
|
245
|
-
ref.current?.scheduleVisemes(payload); // AgentVisemePayload → full lip-sync schedule
|
|
246
|
-
ref.current?.clearVisemes();
|
|
247
|
-
ref.current?.setMood('excited');
|
|
248
|
-
ref.current?.setHairColor('#ff0000');
|
|
249
|
-
ref.current?.setSkinColor('#8d5524');
|
|
250
|
-
ref.current?.setEyeColor('#2e86de');
|
|
251
|
-
ref.current?.setAccessories([...]);
|
|
252
|
-
ref.current?.dispatchMotion('nod');
|
|
253
|
-
```
|
|
340
|
+
The full `ARKIT_TO_OCULUS` coefficient table is exported for building your own bake pipeline.
|
|
254
341
|
|
|
255
342
|
---
|
|
256
343
|
|
|
257
344
|
## Accessories
|
|
258
345
|
|
|
259
|
-
Any GLB attached to any skeleton bone
|
|
346
|
+
Any GLB attached to any skeleton bone, placeable at runtime.
|
|
260
347
|
|
|
261
348
|
```ts
|
|
262
349
|
interface TalkingHeadAccessory {
|
|
@@ -269,59 +356,56 @@ interface TalkingHeadAccessory {
|
|
|
269
356
|
}
|
|
270
357
|
```
|
|
271
358
|
|
|
272
|
-
Common Mixamo bones: `Head, Neck, Spine, Spine1, Spine2, LeftHand, RightHand, LeftFoot, RightFoot, Hips
|
|
273
|
-
|
|
274
|
-
The 3D editor (`talking-head-studio/editor`) provides a gizmo for live placement with front/top/side views. LLM-assisted placement is available via the companion backend.
|
|
359
|
+
Common Mixamo bones: `Head, Neck, Spine, Spine1, Spine2, LeftHand, RightHand, LeftFoot, RightFoot, Hips`.
|
|
360
|
+
The 3D editor (`talking-head-studio/editor`, web only) provides a gizmo for live placement.
|
|
275
361
|
|
|
276
362
|
---
|
|
277
363
|
|
|
278
|
-
##
|
|
364
|
+
## Subpath exports
|
|
279
365
|
|
|
280
|
-
|
|
|
366
|
+
| Import | Description |
|
|
281
367
|
|------|-------------|
|
|
282
|
-
| `talking-head-studio` |
|
|
283
|
-
| `talking-head-studio/
|
|
368
|
+
| `talking-head-studio` | Avatar component + `FaceControl` contracts + motion constants |
|
|
369
|
+
| `talking-head-studio/contract` | Stable type-only entrypoint — visemes, FaceControl, backends, motion |
|
|
370
|
+
| `talking-head-studio/html` | `buildAvatarHtml()` for self-hosted / custom iframe embedding |
|
|
371
|
+
| `talking-head-studio/wgpu` | React Native WebGPU renderer (`WgpuAvatar`) |
|
|
372
|
+
| `talking-head-studio/editor` | R3F 3D editor with placement gizmo (web only) |
|
|
284
373
|
| `talking-head-studio/appearance` | Material color system for any GLB |
|
|
285
374
|
| `talking-head-studio/voice` | Audio recording + WAV conversion hooks |
|
|
286
375
|
| `talking-head-studio/sketchfab` | Sketchfab search + download hooks |
|
|
287
376
|
| `talking-head-studio/api` | Studio API client (avatar CRUD, voice profiles) |
|
|
288
377
|
| `talking-head-studio/wardrobe` | Accessory + outfit state management |
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
378
|
+
|
|
379
|
+
Workspace packages (`packages/avatar-creator`, `packages/agent-avatar`) ship an embeddable
|
|
380
|
+
creator widget and a LiveKit + MCP agent integration.
|
|
292
381
|
|
|
293
382
|
---
|
|
294
383
|
|
|
295
384
|
## Roadmap
|
|
296
385
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
- `
|
|
301
|
-
-
|
|
302
|
-
-
|
|
303
|
-
- `AgentVisemePayload`
|
|
304
|
-
-
|
|
305
|
-
-
|
|
306
|
-
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
-
|
|
318
|
-
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
### Longer term
|
|
322
|
-
- Avatar marketplace — `CatalogItem`, `AvatarAsset`, `RarityLevel` types are already defined. Backend + web store + in-creator purchasing.
|
|
323
|
-
- RPM migration tools — import existing RPM avatars where technically possible.
|
|
324
|
-
- SLA + deprecation policy — for teams that need a reliability guarantee as they move off RPM.
|
|
386
|
+
> **Status legend:** ✅ shipped · 🔜 in progress · 🧪 designed, not yet built
|
|
387
|
+
|
|
388
|
+
**Shipped today**
|
|
389
|
+
- ✅ `FaceControl` face-control space (pose + expression + gaze) and `AvatarBackend` interface
|
|
390
|
+
- ✅ `MorphTargetBackend` — GLB morph discovery + mood layering
|
|
391
|
+
- ✅ ARKit → Oculus analytical remap with full coefficient table
|
|
392
|
+
- ✅ `AgentVisemePayload` viseme schedule format + `scheduleVisemes` on both renderers
|
|
393
|
+
- ✅ Shared procedural motion engine (web + native WGPU), gestures, poses, animation clips
|
|
394
|
+
- ✅ Self-hosting via `buildAvatarHtml({ vendorBaseUrl })`
|
|
395
|
+
- ✅ `packages/avatar-creator`, `packages/agent-avatar`
|
|
396
|
+
|
|
397
|
+
**In progress**
|
|
398
|
+
- 🔜 Native (WGPU) gaze bridge (`lookAt`) and mic-reactive listening
|
|
399
|
+
- 🔜 GLB schema walker — report morph coverage, bones, LODs, viseme tier for any model
|
|
400
|
+
|
|
401
|
+
**Designed, not yet built**
|
|
402
|
+
- 🧪 `GaussianBackend` — Gaussian-splat renderer + FLAME per-viseme delta transfer, so a
|
|
403
|
+
model with *no* face rig still gets excellent lip-sync. This is the zero-prerequisite path.
|
|
404
|
+
- 🧪 FLAME viseme transfer pipeline (companion backend) — bake Oculus visemes into a GLB
|
|
405
|
+
that lacks them
|
|
406
|
+
- 🧪 Unity / Unreal SDKs implementing the same `AvatarBackend` contract
|
|
407
|
+
- 🧪 Avatar marketplace + RPM import tooling (`CatalogItem` / `AvatarAsset` types exist;
|
|
408
|
+
backend and store do not)
|
|
325
409
|
|
|
326
410
|
---
|
|
327
411
|
|
|
@@ -331,14 +415,16 @@ The 3D editor (`talking-head-studio/editor`) provides a gizmo for live placement
|
|
|
331
415
|
git clone https://github.com/sitebay/talking-head-studio.git
|
|
332
416
|
cd talking-head-studio
|
|
333
417
|
npm install
|
|
334
|
-
npm run typecheck # must be clean
|
|
418
|
+
npm run typecheck # must be clean
|
|
335
419
|
npm test
|
|
336
420
|
```
|
|
337
421
|
|
|
338
|
-
|
|
422
|
+
Monorepo with `packages/*` as npm workspaces; the main library is the root package. The
|
|
423
|
+
publish gate (`prepublishOnly`) runs lint, typecheck, tests, and metadata checks.
|
|
339
424
|
|
|
340
425
|
---
|
|
341
426
|
|
|
342
|
-
##
|
|
427
|
+
## Credits & license
|
|
343
428
|
|
|
344
|
-
|
|
429
|
+
Built on [met4citizen/TalkingHead](https://github.com/met4citizen/TalkingHead) (rig +
|
|
430
|
+
gestures/poses on the web path) and [Three.js](https://threejs.org). MIT licensed.
|
package/dist/TalkingHead.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import React from 'react';
|
|
2
2
|
import { type StyleProp, type ViewStyle } from 'react-native';
|
|
3
|
-
export type TalkingHeadMood = 'neutral' | 'happy' | 'sad' | 'angry' | 'excited' | 'thinking' | 'concerned' | 'surprised';
|
|
3
|
+
export type TalkingHeadMood = 'neutral' | 'happy' | 'sad' | 'angry' | 'fear' | 'disgust' | 'love' | 'sleep' | 'excited' | 'thinking' | 'concerned' | 'surprised';
|
|
4
4
|
import type { AgentVisemePayload, OculusViseme, VisemeCue } from './core/avatar/visemes';
|
|
5
|
+
import type { MotionKey, TalkingHeadGesture, TalkingHeadPose } from './core/avatar/motion';
|
|
5
6
|
export type TalkingHeadLoadingStage = 'booting' | 'fetching_model' | 'loading_avatar' | 'loading_fallback' | 'ready';
|
|
6
7
|
export interface TalkingHeadLoadingState {
|
|
7
8
|
stage: TalkingHeadLoadingStage;
|
|
@@ -88,8 +89,32 @@ export interface TalkingHeadRef {
|
|
|
88
89
|
setSkinColor: (color: string) => void;
|
|
89
90
|
setEyeColor: (color: string) => void;
|
|
90
91
|
setAccessories: (accessories: TalkingHeadAccessory[]) => void;
|
|
91
|
-
/**
|
|
92
|
-
dispatchMotion
|
|
92
|
+
/** Play a procedural motion (e.g. 'attack', 'defend', 'groove'). */
|
|
93
|
+
dispatchMotion(name: MotionKey): void;
|
|
94
|
+
dispatchMotion(name: string): void;
|
|
95
|
+
/** Stop the current procedural motion and return to rest. */
|
|
96
|
+
stopMotion: () => void;
|
|
97
|
+
/** Play an upstream TalkingHead hand gesture (e.g. 'thumbup'). */
|
|
98
|
+
playGesture: (name: TalkingHeadGesture | string, opts?: {
|
|
99
|
+
dur?: number;
|
|
100
|
+
mirror?: boolean;
|
|
101
|
+
ms?: number;
|
|
102
|
+
}) => void;
|
|
103
|
+
/** Stop the current gesture, easing out over `ms`. */
|
|
104
|
+
stopGesture: (ms?: number) => void;
|
|
105
|
+
/** Strike a pose — a built-in template name (e.g. 'oneknee') or a pose-file URL. */
|
|
106
|
+
playPose: (urlOrTemplate: TalkingHeadPose | string, dur?: number) => void;
|
|
107
|
+
/** Release the current pose and return to the default stance. */
|
|
108
|
+
stopPose: () => void;
|
|
109
|
+
/** Play a full body animation from a GLB/FBX URL (e.g. a combat move). */
|
|
110
|
+
playAnimation: (url: string, opts?: {
|
|
111
|
+
dur?: number;
|
|
112
|
+
index?: number;
|
|
113
|
+
}) => void;
|
|
114
|
+
/** Stop the current body animation. */
|
|
115
|
+
stopAnimation: () => void;
|
|
116
|
+
/** Turn head/eyes toward viewport coordinates (px), easing over `ms`. */
|
|
117
|
+
lookAt: (x: number, y: number, ms?: number) => void;
|
|
93
118
|
}
|
|
94
119
|
/** @deprecated Use AvatarPlayerRef */
|
|
95
120
|
export type TalkingHeadRefAlias = TalkingHeadRef;
|