talking-head-studio 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. package/README.md +279 -193
  2. package/dist/TalkingHead.d.ts +28 -3
  3. package/dist/TalkingHead.js +21 -2
  4. package/dist/TalkingHead.web.d.ts +31 -4
  5. package/dist/TalkingHead.web.js +11 -1
  6. package/dist/TalkingHeadVisualization.d.ts +22 -0
  7. package/dist/TalkingHeadVisualization.js +30 -10
  8. package/dist/api/studioApi.d.ts +12 -1
  9. package/dist/api/studioApi.js +16 -2
  10. package/dist/contract.d.ts +14 -0
  11. package/dist/contract.js +30 -0
  12. package/dist/core/avatar/avatarCapabilities.d.ts +60 -0
  13. package/dist/core/avatar/avatarCapabilities.js +100 -0
  14. package/dist/core/avatar/backends/gaussian.js +6 -4
  15. package/dist/core/avatar/motion.d.ts +1713 -0
  16. package/dist/core/avatar/motion.js +550 -0
  17. package/dist/core/avatar/motionRuntime.d.ts +46 -0
  18. package/dist/core/avatar/motionRuntime.js +84 -0
  19. package/dist/core/avatar/schema.d.ts +33 -5
  20. package/dist/core/avatar/visemes.d.ts +16 -1
  21. package/dist/core/avatar/visemes.js +48 -1
  22. package/dist/editor/AvatarCanvas.js +92 -1
  23. package/dist/editor/AvatarEditor.native.js +1 -0
  24. package/dist/editor/AvatarModel.js +1 -0
  25. package/dist/editor/FaceSqueezeEditor.d.ts +3 -1
  26. package/dist/editor/FaceSqueezeEditor.js +176 -112
  27. package/dist/editor/FaceSqueezeEditor.web.d.ts +3 -1
  28. package/dist/editor/FaceSqueezeEditor.web.js +30 -28
  29. package/dist/editor/RigidAccessory.js +17 -2
  30. package/dist/editor/SkinnedClothing.js +1 -0
  31. package/dist/editor/boneLockedDrag.d.ts +11 -0
  32. package/dist/editor/boneLockedDrag.js +68 -0
  33. package/dist/editor/boneSnap.web.d.ts +27 -0
  34. package/dist/editor/boneSnap.web.js +99 -0
  35. package/dist/editor/index.web.d.ts +10 -0
  36. package/dist/editor/index.web.js +26 -0
  37. package/dist/editor/sounds/haha.wav +0 -0
  38. package/dist/editor/sounds/owie.wav +0 -0
  39. package/dist/editor/sounds/stop.wav +0 -0
  40. package/dist/editor/studioTheme.d.ts +14 -14
  41. package/dist/editor/studioTheme.js +17 -14
  42. package/dist/editor/types.d.ts +1 -0
  43. package/dist/html/accessories.d.ts +7 -0
  44. package/dist/html/accessories.js +149 -0
  45. package/dist/html/motion.d.ts +1 -0
  46. package/dist/html/motion.js +189 -0
  47. package/dist/html/visemes.d.ts +7 -0
  48. package/dist/html/visemes.js +348 -0
  49. package/dist/html.d.ts +1 -1
  50. package/dist/html.js +55 -732
  51. package/dist/index.d.ts +7 -3
  52. package/dist/index.js +17 -1
  53. package/dist/index.web.d.ts +18 -1
  54. package/dist/index.web.js +36 -3
  55. package/dist/sketchfab/api.js +1 -0
  56. package/dist/sketchfab/glbInspect.d.ts +22 -0
  57. package/dist/sketchfab/glbInspect.js +58 -0
  58. package/dist/sketchfab/index.d.ts +3 -0
  59. package/dist/sketchfab/index.js +8 -1
  60. package/dist/sketchfab/inspectRemote.d.ts +13 -0
  61. package/dist/sketchfab/inspectRemote.js +77 -0
  62. package/dist/sketchfab/types.d.ts +10 -0
  63. package/dist/studio/AccessoryBrowserScreen.d.ts +6 -0
  64. package/dist/studio/AccessoryBrowserScreen.js +626 -0
  65. package/dist/studio/AccessoryPanel.d.ts +10 -0
  66. package/dist/studio/AccessoryPanel.js +396 -0
  67. package/dist/studio/AppearancePanel.d.ts +9 -0
  68. package/dist/studio/AppearancePanel.js +77 -0
  69. package/dist/studio/AvatarCreatorScreen.d.ts +5 -0
  70. package/dist/studio/AvatarCreatorScreen.js +806 -0
  71. package/dist/studio/AvatarEditorScreen.d.ts +14 -0
  72. package/dist/studio/AvatarEditorScreen.js +510 -0
  73. package/dist/studio/AvatarGrid.d.ts +23 -0
  74. package/dist/studio/AvatarGrid.js +257 -0
  75. package/dist/studio/ColorSwatch.d.ts +8 -0
  76. package/dist/studio/ColorSwatch.js +100 -0
  77. package/dist/studio/CreateVoiceProfileSheet.d.ts +8 -0
  78. package/dist/studio/CreateVoiceProfileSheet.js +242 -0
  79. package/dist/studio/DetailsPanel.d.ts +15 -0
  80. package/dist/studio/DetailsPanel.js +239 -0
  81. package/dist/studio/FilamentEditor.d.ts +2 -0
  82. package/dist/studio/FilamentEditor.js +6 -0
  83. package/dist/studio/PrecisionPanel.d.ts +2 -0
  84. package/dist/studio/PrecisionPanel.js +7 -0
  85. package/dist/studio/PublicGalleryScreen.d.ts +5 -0
  86. package/dist/studio/PublicGalleryScreen.js +358 -0
  87. package/dist/studio/SketchfabModelCard.d.ts +20 -0
  88. package/dist/studio/SketchfabModelCard.js +104 -0
  89. package/dist/studio/StudioBrowseHeader.d.ts +9 -0
  90. package/dist/studio/StudioBrowseHeader.js +28 -0
  91. package/dist/studio/StudioEmptyState.d.ts +8 -0
  92. package/dist/studio/StudioEmptyState.js +29 -0
  93. package/dist/studio/StudioFloatingAction.d.ts +13 -0
  94. package/dist/studio/StudioFloatingAction.js +42 -0
  95. package/dist/studio/StudioSectionHeader.d.ts +7 -0
  96. package/dist/studio/StudioSectionHeader.js +27 -0
  97. package/dist/studio/StudioSurfaceCard.d.ts +8 -0
  98. package/dist/studio/StudioSurfaceCard.js +20 -0
  99. package/dist/studio/VoicePanel.d.ts +15 -0
  100. package/dist/studio/VoicePanel.js +305 -0
  101. package/dist/studio/constants.d.ts +3 -0
  102. package/dist/studio/constants.js +6 -0
  103. package/dist/studio/index.d.ts +29 -0
  104. package/dist/studio/index.js +54 -0
  105. package/dist/studio/useSketchfabCapabilities.d.ts +31 -0
  106. package/dist/studio/useSketchfabCapabilities.js +82 -0
  107. package/dist/tts/useDirectVisemeStream.js +15 -10
  108. package/dist/utils/avatarUtils.js +92 -5
  109. package/dist/utils/faceLandmarkerToShapeWeights.js +2 -4
  110. package/dist/voice/useAudioPlayer.js +17 -4
  111. package/dist/voice/useVoicePreview.js +4 -2
  112. package/dist/wardrobe/index.d.ts +1 -0
  113. package/dist/wardrobe/index.js +6 -1
  114. package/dist/wardrobe/useAccessoryGestures.d.ts +20 -0
  115. package/dist/wardrobe/useAccessoryGestures.js +94 -0
  116. package/dist/wardrobe/useAvatarWardrobeHydration.js +8 -2
  117. package/dist/wardrobe/useStudioAvatar.js +11 -2
  118. package/dist/wardrobe/wardrobeStore.d.ts +2 -0
  119. package/dist/wardrobe/wardrobeStore.js +12 -2
  120. package/dist/wgpu/R3FWebGpuCanvas.d.ts +15 -0
  121. package/dist/wgpu/R3FWebGpuCanvas.js +176 -0
  122. package/dist/wgpu/WgpuAvatar.d.ts +26 -2
  123. package/dist/wgpu/WgpuAvatar.js +296 -39
  124. package/dist/wgpu/accessoryDefaults.d.ts +12 -0
  125. package/dist/wgpu/accessoryDefaults.js +19 -0
  126. package/dist/wgpu/blobShim.d.ts +2 -0
  127. package/dist/wgpu/blobShim.js +191 -0
  128. package/dist/wgpu/index.d.ts +1 -0
  129. package/dist/wgpu/index.js +4 -1
  130. package/dist/wgpu/loadGLTFFromUri.d.ts +2 -0
  131. package/dist/wgpu/loadGLTFFromUri.js +75 -0
  132. package/dist/wgpu/morphTables.js +21 -10
  133. package/dist/wgpu/motionState.d.ts +20 -0
  134. package/dist/wgpu/motionState.js +31 -0
  135. package/dist/wgpu/patchThreeForRN.d.ts +28 -0
  136. package/dist/wgpu/patchThreeForRN.js +292 -0
  137. package/dist/wgpu/scenePlacement.d.ts +5 -0
  138. package/dist/wgpu/scenePlacement.js +50 -0
  139. package/dist/wgpu/useAuthedModelUri.js +4 -2
  140. package/dist/wgpu/useNativeGLTF.d.ts +7 -0
  141. package/dist/wgpu/useNativeGLTF.js +36 -0
  142. package/package.json +97 -31
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # talking-head-studio
2
2
 
3
- **Open-source avatar platform for Web, React Native, Unity, and Unreal. Any GLB model. Full lip-sync with or without blend shapes.**
3
+ **Make any GLB model talk on the web and on React Native with phoneme-accurate, audio-aligned lip-sync. With or without blend shapes.**
4
4
 
5
5
  [![npm version](https://img.shields.io/npm/v/talking-head-studio.svg)](https://www.npmjs.com/package/talking-head-studio)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
@@ -8,65 +8,123 @@
8
8
 
9
9
  ---
10
10
 
11
- ## What this is
11
+ ## The point: lip-sync that's driven by the audio, not guessed
12
12
 
13
- A drop-in avatar runtime and platform SDK built to be a self-hostable replacement for Ready Player Me. The core problem it solves: **any arbitrary 3D model should be able to talk, emote, and respond to a voice pipeline** — regardless of whether the artist baked in blend shapes, visemes, or any face rig at all.
13
+ Most avatar libraries flap a jaw open in proportion to audio loudness. That reads as
14
+ "mouth moving," not "speaking." talking-head-studio is built around a different model: a
15
+ **viseme schedule** — a timed list of mouth shapes derived from the actual synthesized
16
+ speech — drives morph targets on the model.
14
17
 
15
- The library ships a renderer (web iframe + React Native wgpu), a backend-agnostic face control contract, and a growing set of adapters that map TTS/audio/AI output onto whatever rendering mechanism the model actually supports.
18
+ ```
19
+ TTS server ──▶ AgentVisemePayload ──▶ scheduleVisemes() ──▶ morph drive
20
+ (word-aligned { cues: [{ viseme, startMs, (this library, (Three.js
21
+ phonemes) endMs }], durationMs } web + native) morph targets)
22
+ ```
16
23
 
17
- ---
24
+ The wire format is `AgentVisemePayload`: per-phoneme cues using the 9-shape Rhubarb
25
+ vocabulary (`A`–`H`, `X`), each with a start/end time in milliseconds. The library maps
26
+ those onto Oculus viseme morphs and schedules them against the audio clock, so the mouth
27
+ hits each shape *when that sound is actually heard*.
28
+
29
+ This pairs directly with a TTS server that emits viseme timings from real word alignment
30
+ (we built [Qwen3-TTS](https://github.com/sitebay/Qwen3-TTS) for exactly this — it serves
31
+ `AgentVisemePayload` over an SSE endpoint). But the format is open: emit cues from any
32
+ source and the renderer consumes them identically.
18
33
 
19
- ## Lip-sync tiers (any model works)
34
+ ### Four lip-sync tiers every model works
20
35
 
21
- | Model type | Lip-sync method | Quality |
36
+ The model decides the fidelity; you don't have to pre-process anything.
37
+
38
+ | Your model has… | Method | Quality |
22
39
  |---|---|---|
23
- | GLB with Oculus viseme morphs | Direct morph drive via `MorphTargetBackend` | Excellent |
24
- | GLB with ARKit blend shapes | `remapArkitToOculus()` → morph drive | Good |
25
- | GLB with only `jawOpen` / `mouthOpen` | Amplitude fallback | Acceptable |
26
- | Any other GLB | Gaussian splat backend *(roadmap)* | Excellent |
40
+ | Oculus viseme morphs | Direct morph drive (`MorphTargetBackend`) | Excellent |
41
+ | ARKit blend shapes (52 AUs) | `remapArkitToOculus()` → morph drive | Good |
42
+ | Only `jawOpen` / `mouthOpen` | Amplitude fallback | Acceptable |
43
+ | No face rig at all | Gaussian splat backend *(roadmap — not yet built)* | Excellent |
27
44
 
28
- The last row is the goal: **scan any model into a Gaussian representation, generate per-viseme deltas via FLAME-based transfer, and drive it from the same `FaceControl` contract everything else uses.** No blend shapes required. No artist work required.
45
+ If a model has no viseme morphs, scheduled cues still fall back to the jaw/amplitude path
46
+ automatically — you never get a frozen face.
29
47
 
30
48
  ---
31
49
 
32
- ## Architecture
50
+ ## Two renderers, one contract
33
51
 
34
- ```
35
- TTS / audio / face tracking
36
-
37
- AgentVisemePayload ← canonical wire format for lip-sync schedules
38
-
39
- FaceControl ← pose (HeadPose) + expression (ExpressionState) + gaze (EyeGaze)
40
-
41
- AvatarBackend ←────────────── swap without changing anything upstream
42
- ├── MorphTargetBackend ← Three.js morph targets (GLB with blend shapes)
43
- ├── GaussianBackend ← [roadmap] Gaussian splat + FLAME delta transfer
44
- └── (your backend) ← implement AvatarBackend, plug in
45
-
46
- Renderer
47
- ├── Web iframe ← TalkingHead.web.tsx (any React app)
48
- ├── React Native wgpu ← WgpuAvatar (native GPU, no WebView latency)
49
- └── Unity / Unreal ← [roadmap] SDK plugins consuming same contracts
50
- ```
52
+ The same `AgentVisemePayload` / `FaceControl` contract drives both render paths, so you
53
+ write your voice pipeline once:
54
+
55
+ - **Web** an isolated `<iframe>` running [met4citizen TalkingHead](https://github.com/met4citizen/TalkingHead)
56
+ as the rig (`TalkingHead.web.tsx`). Drop it into any React / Next / Vite app.
57
+ - **React Native** a native WebGPU renderer (`WgpuAvatar`, via `react-native-wgpu` +
58
+ react-three-fiber). No WebView, no postMessage latency, morphs driven on the GPU.
51
59
 
52
- Everything above `AvatarBackend` is renderer-agnostic. Everything above `FaceControl` is model-agnostic.
60
+ Capabilities differ slightly between the two see the [capability matrix](#runtime-capability-matrix).
53
61
 
54
62
  ---
55
63
 
56
- ## Installation
64
+ ## Install
57
65
 
58
66
  ```bash
59
- # React Native / Expo
67
+ # React Native / Expo WebView path
60
68
  npm install talking-head-studio react-native-webview
61
69
 
70
+ # React Native / Expo native WebGPU path
71
+ npx expo install react-native-wgpu @react-three/fiber three three-stdlib expo-asset
72
+
62
73
  # Web (React, Next.js, Vite)
63
74
  npm install talking-head-studio
64
75
  ```
65
76
 
77
+ `three`, `@react-three/fiber`, and the platform packages are peer dependencies — bring your
78
+ own versions. `react-native-webview` is only required for the WebView renderer. Native
79
+ WebGPU uses `react-native-wgpu` and must run in a native build, not Expo Go.
80
+
81
+ ### React Native / Expo WebGPU setup
82
+
83
+ Native WebGPU needs the React Native new architecture and the WebGPU build of Three.js.
84
+ The example app in `example/` has the full working config; these are the important parts:
85
+
86
+ ```jsonc
87
+ // app.json
88
+ {
89
+ "expo": {
90
+ "newArchEnabled": true,
91
+ "plugins": ["expo-asset"]
92
+ }
93
+ }
94
+ ```
95
+
96
+ ```js
97
+ // metro.config.js
98
+ const path = require('path');
99
+ const { getDefaultConfig } = require('expo/metro-config');
100
+
101
+ const config = getDefaultConfig(__dirname);
102
+ const nodeModules = path.resolve(__dirname, 'node_modules');
103
+ const threeWebgpu = path.resolve(nodeModules, 'three/build/three.webgpu.js');
104
+
105
+ config.resolver.assetExts.push('glb');
106
+ config.resolver.extraNodeModules = {
107
+ three: threeWebgpu,
108
+ };
109
+
110
+ module.exports = config;
111
+ ```
112
+
113
+ Build and launch a native app so `WebGPUModule` is linked:
114
+
115
+ ```bash
116
+ npx expo prebuild --platform android --no-install
117
+ npx expo run:android
118
+ ```
119
+
120
+ Expo Go cannot load the native WebGPU module.
121
+
66
122
  ---
67
123
 
68
124
  ## Quick start
69
125
 
126
+ ### Web / React Native component
127
+
70
128
  ```tsx
71
129
  import { useRef } from 'react';
72
130
  import { TalkingHead, type TalkingHeadRef } from 'talking-head-studio';
@@ -80,99 +138,180 @@ export default function Avatar() {
80
138
  avatarUrl="https://example.com/your-model.glb"
81
139
  mood="happy"
82
140
  cameraView="upper"
83
- hairColor="#1a1a2e"
84
- skinColor="#e0a370"
85
- accessories={[{
86
- id: 'sunglasses',
87
- url: 'https://example.com/sunglasses.glb',
88
- bone: 'Head',
89
- position: [0, 0.08, 0.12],
90
- rotation: [0, 0, 0],
91
- scale: 1.0,
92
- }]}
93
141
  style={{ width: 400, height: 600 }}
94
- onReady={() => console.log('ready')}
142
+ onReady={() => {
143
+ // Drive the mouth from a viseme schedule (e.g. from your TTS server)
144
+ ref.current?.scheduleVisemes({
145
+ cues: [
146
+ { viseme: 'A', startMs: 0, endMs: 90 },
147
+ { viseme: 'E', startMs: 90, endMs: 170 },
148
+ { viseme: 'X', startMs: 170, endMs: 220 },
149
+ ],
150
+ durationMs: 220,
151
+ audioStartedAtMs: Date.now(),
152
+ });
153
+ }}
95
154
  />
96
155
  );
97
156
  }
98
157
  ```
99
158
 
159
+ ### Native WebGPU (React Native, no WebView)
160
+
161
+ ```tsx
162
+ import { WgpuAvatar, type WgpuAvatarRef } from 'talking-head-studio/wgpu';
163
+
164
+ const ref = useRef<WgpuAvatarRef>(null);
165
+
166
+ <WgpuAvatar
167
+ ref={ref}
168
+ avatarUrl="https://example.com/your-model.glb"
169
+ mood="neutral"
170
+ style={{ flex: 1 }}
171
+ />;
172
+ // ref.current?.scheduleVisemes(payload) — same contract as the web component
173
+ ```
174
+
100
175
  ---
101
176
 
102
- ## FaceControlthe core contract
177
+ ## TalkingHead component props & ref
103
178
 
104
- The `FaceControl` type is the single value that flows between your voice pipeline and any avatar backend. If you're building a custom backend or integrating with a game engine, this is what you implement against.
179
+ ### Props
180
+
181
+ | Prop | Type | Default | Description |
182
+ |------|------|---------|-------------|
183
+ | `avatarUrl` | `string` | required | Any `.glb`. Rigged or not. |
184
+ | `authToken` | `string \| null` | `null` | Bearer token for authenticated GLB URLs. |
185
+ | `mood` | `TalkingHeadMood` | `'neutral'` | `neutral \| happy \| sad \| angry \| fear \| disgust \| love \| sleep \| excited \| thinking \| concerned \| surprised` |
186
+ | `cameraView` | `'head' \| 'upper' \| 'full'` | `'upper'` | Framing preset. |
187
+ | `cameraDistance` | `number` | `-0.5` | Zoom offset. Negative = closer. |
188
+ | `hairColor` | `string` | — | Hex color. Applied to materials named `hair`, `fur`. |
189
+ | `skinColor` | `string` | — | Applied to `skin`, `body`, `face`. |
190
+ | `eyeColor` | `string` | — | Applied to `eye`, `iris`. |
191
+ | `accessories` | `TalkingHeadAccessory[]` | `[]` | Bone-attached GLB items. |
192
+ | `onReady` | `() => void` | — | Fired when fully loaded. |
193
+ | `onError` | `(msg: string) => void` | — | Fired on load failure. |
194
+ | `style` | `ViewStyle / CSSProperties` | — | Container style. |
195
+
196
+ ### Ref methods
105
197
 
106
198
  ```ts
107
- import type { FaceControl, ExpressionState, HeadPose, EyeGaze } from 'talking-head-studio';
199
+ // Lip-sync
200
+ ref.current?.scheduleVisemes(payload); // AgentVisemePayload → full timed lip-sync schedule
201
+ ref.current?.clearVisemes();
202
+ ref.current?.sendAmplitude(0.7); // amplitude 0..1 → jaw (fallback / no schedule)
108
203
 
109
- type HeadPose = {
110
- yaw: number; // -1..1, left..right
111
- pitch: number; // -1..1, down..up
112
- roll: number; // -1..1, tilt
113
- };
204
+ // Expression & appearance
205
+ ref.current?.setMood('excited');
206
+ ref.current?.setHairColor('#ff0000');
207
+ ref.current?.setSkinColor('#8d5524');
208
+ ref.current?.setEyeColor('#2e86de');
209
+ ref.current?.setAccessories([...]);
114
210
 
115
- type EyeGaze = {
116
- x: number; // -1..1, left..right
117
- y: number; // -1..1, down..up
118
- };
211
+ // Body procedural motions, gestures, poses, animation clips
212
+ ref.current?.dispatchMotion('groove'); // looping procedural motion
213
+ ref.current?.stopMotion();
214
+ ref.current?.playGesture('thumbup'); // upstream hand gesture
215
+ ref.current?.playPose('oneknee'); // upstream pose template
216
+ ref.current?.playAnimation('/animations/wave.glb', { dur: 2 });
217
+ ref.current?.lookAt(120, 80, 500); // turn toward viewport coords
218
+ ```
219
+
220
+ The motion vocabulary (`groove`, `wave`, `nod`, `idle`, `attack`, `defend`, `celebrate`,
221
+ plus every upstream gesture/pose name) is exported as typed constants —
222
+ `MOTION_KEYS`, `TALKINGHEAD_GESTURES`, `TALKINGHEAD_POSES`, and the `isMotionKey()` guard —
223
+ from both the package root and `talking-head-studio/contract`.
224
+
225
+ ### Runtime capability matrix
226
+
227
+ Both renderers share one API; where native can't match the WebView's upstream rig, it
228
+ falls back to a procedural approximation rather than failing. This table is the honest gap
229
+ list.
230
+
231
+ | Feature | Web (iframe) | Native (WGPU) | Notes |
232
+ |---|:---:|:---:|---|
233
+ | Viseme schedules (`scheduleVisemes`) | ✅ | ✅ | Both consume `AgentVisemePayload`. |
234
+ | Amplitude jaw fallback (`sendAmplitude`) | ✅ | ⚠️ | Web drives jaw from amplitude; native exposes the method for API parity. |
235
+ | Core procedural motions (`groove`, `attack`, `defend`) | ✅ | ✅ | Shared `MOTION_DEFS` source of truth. |
236
+ | Gesture names (`thumbup`, `shrug`, …) | ✅ | ⚠️ | Web delegates to TalkingHead; native uses procedural approximations. |
237
+ | Pose names (`oneknee`, `kneel`, `sitting`, …) | ✅ | ⚠️ | Web delegates to TalkingHead; native uses static procedural poses. |
238
+ | Full mood vocabulary | ✅ | ✅ | All 8 upstream moods + friendly aliases. |
239
+ | External animation clips (`playAnimation`) | ✅ | ⚠️ | Web delegates to TalkingHead; native plays GLB clips via `AnimationMixer`. |
240
+ | Gaze (`lookAt`) | ✅ | ❌ | Native eye/head-gaze bridge is future work. |
241
+ | Listening / mic-reactive mouth | ⚠️ | ❌ | Web can route host-provided audio; native bridge not implemented. |
242
+
243
+ ---
244
+
245
+ ## Self-hosting the runtime assets
246
+
247
+ By default the web iframe pulls the TalkingHead rig, three.js, and the HeadAudio model
248
+ from public CDNs (jsDelivr, gstatic). To run fully self-hosted — no external CDN — vendor
249
+ those files and point the renderer at your own origin:
250
+
251
+ ```ts
252
+ import { buildAvatarHtml } from 'talking-head-studio/html';
253
+
254
+ const html = buildAvatarHtml({
255
+ avatarUrl: 'https://your-cdn/model.glb',
256
+ vendorBaseUrl: 'https://your-cdn/vendor', // serves three.module.js, talkinghead.mjs, etc.
257
+ // ...
258
+ });
259
+ ```
260
+
261
+ `vendorBaseUrl` replaces every CDN reference; `dracoDecoderUrl` overrides the DRACO decoder
262
+ location independently.
263
+
264
+ ---
265
+
266
+ ## FaceControl — the lower-level contract
267
+
268
+ If you're writing a custom backend or a game-engine integration, `FaceControl` is the
269
+ single value that flows between a voice pipeline and any avatar backend.
270
+
271
+ ```ts
272
+ import type { FaceControl, ExpressionState, HeadPose, EyeGaze } from 'talking-head-studio';
273
+
274
+ type HeadPose = { yaw: number; pitch: number; roll: number }; // each -1..1
275
+ type EyeGaze = { x: number; y: number }; // each -1..1
119
276
 
120
277
  type ExpressionState = {
121
- jawOpen: number; // 0..1
122
- mouthSmile: number;
123
- mouthFunnel: number;
124
- mouthPucker: number;
125
- mouthWide: number;
126
- upperLipRaise: number;
127
- lowerLipDepress: number;
128
- cheekRaise: number;
129
- blinkLeft: number;
130
- blinkRight: number;
131
- browInnerUp: number;
132
- browDownLeft: number;
133
- browDownRight: number;
134
- eyeGazeLeft: EyeGaze;
135
- eyeGazeRight: EyeGaze;
136
- };
278
+ jawOpen: number; mouthSmile: number; mouthFunnel: number; mouthPucker: number;
279
+ mouthWide: number; upperLipRaise: number; lowerLipDepress: number; cheekRaise: number;
280
+ blinkLeft: number; blinkRight: number; browInnerUp: number;
281
+ browDownLeft: number; browDownRight: number;
282
+ eyeGazeLeft: EyeGaze; eyeGazeRight: EyeGaze;
283
+ }; // all weights 0..1 unless noted
137
284
  ```
138
285
 
139
- ### Driving FaceControl from a viseme schedule
286
+ Drive it from a viseme schedule:
140
287
 
141
288
  ```ts
142
289
  import { useFaceControlsFromVisemes } from 'talking-head-studio';
143
290
 
144
- // schedule: AgentVisemePayload from your TTS backend
145
- const faceControl = useFaceControlsFromVisemes(schedule);
146
- // → { pose: { yaw:0, pitch:0, roll:0 }, expr: { jawOpen: 0.7, ... } }
291
+ const faceControl = useFaceControlsFromVisemes(schedule); // rAF-sampled FaceControl
147
292
  ```
148
293
 
149
- ### Implementing a custom backend
294
+ Or implement a backend against it:
150
295
 
151
296
  ```ts
152
297
  import type { AvatarBackend, AvatarRenderTarget, FaceControl } from 'talking-head-studio';
153
298
 
154
- class MyGaussianBackend implements AvatarBackend {
155
- initialize() { /* load splat data, FLAME weights */ }
156
- attach(target: AvatarRenderTarget) { /* bind to canvas/surface */ }
157
- setControl(control: FaceControl) { /* map ExpressionState → splat coefficients */ }
158
- renderFrame() { /* rasterize */ }
159
- dispose() { /* cleanup */ }
299
+ class MyBackend implements AvatarBackend {
300
+ initialize() {}
301
+ attach(target: AvatarRenderTarget) {}
302
+ setControl(control: FaceControl) {}
303
+ renderFrame() {}
304
+ dispose() {}
160
305
  }
161
306
  ```
162
307
 
163
- ---
164
-
165
- ## MorphTargetBackend — Three.js GLB adapter
308
+ ### MorphTargetBackend — the built-in Three.js adapter
166
309
 
167
- The first concrete `AvatarBackend` implementation. Give it any loaded Three.js scene and it will find morph targets, build a lookup cache, and drive them from `FaceControl`.
310
+ The concrete `AvatarBackend` for GLB-with-morphs. Hand it a loaded scene; it discovers
311
+ morph targets, builds a lookup cache, and drives them from `FaceControl`.
168
312
 
169
313
  ```ts
170
- import * as THREE from 'three';
171
- import { GLTFLoader } from 'three/examples/jsm/loaders/GLTFLoader';
172
- import { MorphTargetBackend } from 'talking-head-studio';
173
-
174
- const loader = new GLTFLoader();
175
- const gltf = await loader.loadAsync('/avatar.glb');
314
+ import { MorphTargetBackend, createNeutralExpression } from 'talking-head-studio';
176
315
 
177
316
  const backend = new MorphTargetBackend(gltf.scene, {
178
317
  mood: 'neutral',
@@ -184,79 +323,27 @@ const backend = new MorphTargetBackend(gltf.scene, {
184
323
  },
185
324
  });
186
325
 
187
- // Each frame:
188
326
  backend.setControl(faceControl);
189
327
  backend.renderFrame();
190
-
191
- // Debug: what morphs does this model actually have?
192
- console.log(backend.availableChannels);
193
- // → { visemes: ['aa','PP','oh',...], expressions: ['jawOpen','blinkLeft',...], gaze: ['lookLeft','lookUp'] }
328
+ console.log(backend.availableChannels); // what this model actually supports
194
329
  ```
195
330
 
196
- ---
197
-
198
- ## ARKit → Oculus remap
199
-
200
- Models with ARKit blend shapes (52 facial action units) but no Oculus viseme morphs can be remapped analytically — no ML, no FLAME, no artist work.
331
+ ### ARKit → Oculus remap (no ML, no artist work)
201
332
 
202
333
  ```ts
203
334
  import { remapArkitToOculus, getArkitWeightsForViseme } from 'talking-head-studio';
204
335
 
205
- // Runtime: face tracking dataOculus viseme weights
206
- const oculusWeights = remapArkitToOculus({
207
- jawOpen: 0.7,
208
- mouthLowerDownLeft: 0.4,
209
- mouthLowerDownRight: 0.4,
210
- });
211
- // → { aa: 0.68, PP: 0.03, oh: 0.12, ... }
212
-
213
- // Bake-time: get the ARKit recipe for a specific viseme
214
- const recipe = getArkitWeightsForViseme('ou');
215
- // → { mouthPucker: 0.9, mouthRollLower: 0.3 }
336
+ remapArkitToOculus({ jawOpen: 0.7, mouthLowerDownLeft: 0.4 }); // { aa: 0.68, oh: 0.12, ... }
337
+ getArkitWeightsForViseme('ou'); // { mouthPucker: 0.9, ... }
216
338
  ```
217
339
 
218
- The full `ARKIT_TO_OCULUS` coefficient table is exported so you can build your own bake pipeline.
219
-
220
- ---
221
-
222
- ## TalkingHead component — props & ref
223
-
224
- ### Props
225
-
226
- | Prop | Type | Default | Description |
227
- |------|------|---------|-------------|
228
- | `avatarUrl` | `string` | required | Any `.glb`. Rigged or not. |
229
- | `authToken` | `string \| null` | `null` | Bearer token for authenticated GLB URLs. |
230
- | `mood` | `TalkingHeadMood` | `'neutral'` | `neutral \| happy \| sad \| angry \| excited \| thinking \| concerned \| surprised` |
231
- | `cameraView` | `'head' \| 'upper' \| 'full'` | `'upper'` | Framing preset. |
232
- | `cameraDistance` | `number` | `-0.5` | Zoom offset. Negative = closer. |
233
- | `hairColor` | `string` | — | Hex color. Applied to materials named `hair`, `fur`. |
234
- | `skinColor` | `string` | — | Applied to `skin`, `body`, `face`. |
235
- | `eyeColor` | `string` | — | Applied to `eye`, `iris`. |
236
- | `accessories` | `TalkingHeadAccessory[]` | `[]` | Bone-attached GLB items. |
237
- | `onReady` | `() => void` | — | Fired when fully loaded. |
238
- | `onError` | `(msg: string) => void` | — | Fired on load failure. |
239
- | `style` | `ViewStyle / CSSProperties` | — | Container style. |
240
-
241
- ### Ref methods
242
-
243
- ```ts
244
- ref.current?.sendAmplitude(0.7); // amplitude 0..1 → jaw
245
- ref.current?.scheduleVisemes(payload); // AgentVisemePayload → full lip-sync schedule
246
- ref.current?.clearVisemes();
247
- ref.current?.setMood('excited');
248
- ref.current?.setHairColor('#ff0000');
249
- ref.current?.setSkinColor('#8d5524');
250
- ref.current?.setEyeColor('#2e86de');
251
- ref.current?.setAccessories([...]);
252
- ref.current?.dispatchMotion('nod');
253
- ```
340
+ The full `ARKIT_TO_OCULUS` coefficient table is exported for building your own bake pipeline.
254
341
 
255
342
  ---
256
343
 
257
344
  ## Accessories
258
345
 
259
- Any GLB attached to any skeleton bone. Placement is editable at runtime via the 3D editor.
346
+ Any GLB attached to any skeleton bone, placeable at runtime.
260
347
 
261
348
  ```ts
262
349
  interface TalkingHeadAccessory {
@@ -269,59 +356,56 @@ interface TalkingHeadAccessory {
269
356
  }
270
357
  ```
271
358
 
272
- Common Mixamo bones: `Head, Neck, Spine, Spine1, Spine2, LeftHand, RightHand, LeftFoot, RightFoot, Hips`
273
-
274
- The 3D editor (`talking-head-studio/editor`) provides a gizmo for live placement with front/top/side views. LLM-assisted placement is available via the companion backend.
359
+ Common Mixamo bones: `Head, Neck, Spine, Spine1, Spine2, LeftHand, RightHand, LeftFoot, RightFoot, Hips`.
360
+ The 3D editor (`talking-head-studio/editor`, web only) provides a gizmo for live placement.
275
361
 
276
362
  ---
277
363
 
278
- ## Packages
364
+ ## Subpath exports
279
365
 
280
- | Path | Description |
366
+ | Import | Description |
281
367
  |------|-------------|
282
- | `talking-head-studio` | Live avatar renderer + FaceControl contracts |
283
- | `talking-head-studio/editor` | R3F-based 3D editor with gizmo (web only) |
368
+ | `talking-head-studio` | Avatar component + `FaceControl` contracts + motion constants |
369
+ | `talking-head-studio/contract` | Stable type-only entrypoint visemes, FaceControl, backends, motion |
370
+ | `talking-head-studio/html` | `buildAvatarHtml()` for self-hosted / custom iframe embedding |
371
+ | `talking-head-studio/wgpu` | React Native WebGPU renderer (`WgpuAvatar`) |
372
+ | `talking-head-studio/editor` | R3F 3D editor with placement gizmo (web only) |
284
373
  | `talking-head-studio/appearance` | Material color system for any GLB |
285
374
  | `talking-head-studio/voice` | Audio recording + WAV conversion hooks |
286
375
  | `talking-head-studio/sketchfab` | Sketchfab search + download hooks |
287
376
  | `talking-head-studio/api` | Studio API client (avatar CRUD, voice profiles) |
288
377
  | `talking-head-studio/wardrobe` | Accessory + outfit state management |
289
- | `talking-head-studio/wgpu` | React Native wgpu renderer |
290
- | `packages/avatar-creator` | Embeddable avatar creator widget |
291
- | `packages/agent-avatar` | LiveKit agent + MCP integration |
378
+
379
+ Workspace packages (`packages/avatar-creator`, `packages/agent-avatar`) ship an embeddable
380
+ creator widget and a LiveKit + MCP agent integration.
292
381
 
293
382
  ---
294
383
 
295
384
  ## Roadmap
296
385
 
297
- ### Now shipped
298
- - `FaceControl` canonical face control space (pose + expression + gaze)
299
- - `AvatarBackend` interface — swap renderers without changing upstream code
300
- - `MorphTargetBackend` Three.js GLB adapter with morph target discovery and mood layering
301
- - ARKit Oculus analytical remap (`remapArkitToOculus`, full coefficient table)
302
- - `useFaceControlsFromVisemes` rAF-sampled hook from `AgentVisemePayload`
303
- - `AgentVisemePayload` canonical TTS lip-sync wire format
304
- - `AvatarGlbParams` typed API contract for quality/compression/morph group selection
305
- - `CalibrationProfile` — per-avatar range remapping and gaze limits
306
- - Platform type stubs: SDK (web/Unity/Unreal), marketplace catalog, avatar GLB API
307
- - `packages/avatar-creator` — embeddable creator widget with preset catalog
308
- - `packages/agent-avatar` — LiveKit agent + MCP tool integration
309
-
310
- ### Next
311
- - **GLB schema walker** — scan any loaded GLB and report: morph target coverage, skeleton bones, LODs, viseme tier. Prerequisite for the validator and import pipeline.
312
- - **`GET /avatars/{id}.glb` with `AvatarGlbParams`** — extend the companion backend to serve quality/compression/morph-group variants on the existing endpoint.
313
- - **Creator postMessage bridge** let partners embed the avatar creator in an iframe and receive avatar IDs back, like RPM's WebView creator.
314
-
315
- ### Medium term
316
- - **`GaussianBackend`** — Gaussian splat renderer implementing `AvatarBackend`. Takes any model, scans it, drives expression via FLAME-based per-viseme delta transfer. No artist work, no blend shapes required. This is the zero-prerequisite lip-sync path.
317
- - **FLAME viseme transfer pipeline** (Python, companion backend) — fit FLAME to a face screenshot, generate Oculus viseme deltas, bake back into the GLB as morph targets. Background task on upload for any avatar missing viseme morphs.
318
- - **Unity SDK** C# plugin implementing the `AvatarBackend` contract. Blueprint-friendly API for loading GLBs, driving morphs, consuming `AgentVisemePayload`.
319
- - **Unreal plugin** — UE5 plugin with Blueprint-accessible `UAvatarDescriptor` and a sample Quickstart map.
320
-
321
- ### Longer term
322
- - Avatar marketplace — `CatalogItem`, `AvatarAsset`, `RarityLevel` types are already defined. Backend + web store + in-creator purchasing.
323
- - RPM migration tools — import existing RPM avatars where technically possible.
324
- - SLA + deprecation policy — for teams that need a reliability guarantee as they move off RPM.
386
+ > **Status legend:** shipped · 🔜 in progress · 🧪 designed, not yet built
387
+
388
+ **Shipped today**
389
+ - `FaceControl` face-control space (pose + expression + gaze) and `AvatarBackend` interface
390
+ - `MorphTargetBackend` GLB morph discovery + mood layering
391
+ - ARKit Oculus analytical remap with full coefficient table
392
+ - `AgentVisemePayload` viseme schedule format + `scheduleVisemes` on both renderers
393
+ - Shared procedural motion engine (web + native WGPU), gestures, poses, animation clips
394
+ - Self-hosting via `buildAvatarHtml({ vendorBaseUrl })`
395
+ - `packages/avatar-creator`, `packages/agent-avatar`
396
+
397
+ **In progress**
398
+ - 🔜 Native (WGPU) gaze bridge (`lookAt`) and mic-reactive listening
399
+ - 🔜 GLB schema walker — report morph coverage, bones, LODs, viseme tier for any model
400
+
401
+ **Designed, not yet built**
402
+ - 🧪 `GaussianBackend`Gaussian-splat renderer + FLAME per-viseme delta transfer, so a
403
+ model with *no* face rig still gets excellent lip-sync. This is the zero-prerequisite path.
404
+ - 🧪 FLAME viseme transfer pipeline (companion backend) — bake Oculus visemes into a GLB
405
+ that lacks them
406
+ - 🧪 Unity / Unreal SDKs implementing the same `AvatarBackend` contract
407
+ - 🧪 Avatar marketplace + RPM import tooling (`CatalogItem` / `AvatarAsset` types exist;
408
+ backend and store do not)
325
409
 
326
410
  ---
327
411
 
@@ -331,14 +415,16 @@ The 3D editor (`talking-head-studio/editor`) provides a gizmo for live placement
331
415
  git clone https://github.com/sitebay/talking-head-studio.git
332
416
  cd talking-head-studio
333
417
  npm install
334
- npm run typecheck # must be clean (excluding known expo-audio peer dep warnings)
418
+ npm run typecheck # must be clean
335
419
  npm test
336
420
  ```
337
421
 
338
- The repo is a monorepo with `packages/*` as npm workspaces. The main library is the root package.
422
+ Monorepo with `packages/*` as npm workspaces; the main library is the root package. The
423
+ publish gate (`prepublishOnly`) runs lint, typecheck, tests, and metadata checks.
339
424
 
340
425
  ---
341
426
 
342
- ## License
427
+ ## Credits & license
343
428
 
344
- MIT
429
+ Built on [met4citizen/TalkingHead](https://github.com/met4citizen/TalkingHead) (rig +
430
+ gestures/poses on the web path) and [Three.js](https://threejs.org). MIT licensed.
@@ -1,7 +1,8 @@
1
1
  import React from 'react';
2
2
  import { type StyleProp, type ViewStyle } from 'react-native';
3
- export type TalkingHeadMood = 'neutral' | 'happy' | 'sad' | 'angry' | 'excited' | 'thinking' | 'concerned' | 'surprised';
3
+ export type TalkingHeadMood = 'neutral' | 'happy' | 'sad' | 'angry' | 'fear' | 'disgust' | 'love' | 'sleep' | 'excited' | 'thinking' | 'concerned' | 'surprised';
4
4
  import type { AgentVisemePayload, OculusViseme, VisemeCue } from './core/avatar/visemes';
5
+ import type { MotionKey, TalkingHeadGesture, TalkingHeadPose } from './core/avatar/motion';
5
6
  export type TalkingHeadLoadingStage = 'booting' | 'fetching_model' | 'loading_avatar' | 'loading_fallback' | 'ready';
6
7
  export interface TalkingHeadLoadingState {
7
8
  stage: TalkingHeadLoadingStage;
@@ -88,8 +89,32 @@ export interface TalkingHeadRef {
88
89
  setSkinColor: (color: string) => void;
89
90
  setEyeColor: (color: string) => void;
90
91
  setAccessories: (accessories: TalkingHeadAccessory[]) => void;
91
- /** Dispatch a named motion/gesture to the avatar (e.g. 'wave_right', 'dance_idle'). */
92
- dispatchMotion: (name: string) => void;
92
+ /** Play a procedural motion (e.g. 'attack', 'defend', 'groove'). */
93
+ dispatchMotion(name: MotionKey): void;
94
+ dispatchMotion(name: string): void;
95
+ /** Stop the current procedural motion and return to rest. */
96
+ stopMotion: () => void;
97
+ /** Play an upstream TalkingHead hand gesture (e.g. 'thumbup'). */
98
+ playGesture: (name: TalkingHeadGesture | string, opts?: {
99
+ dur?: number;
100
+ mirror?: boolean;
101
+ ms?: number;
102
+ }) => void;
103
+ /** Stop the current gesture, easing out over `ms`. */
104
+ stopGesture: (ms?: number) => void;
105
+ /** Strike a pose — a built-in template name (e.g. 'oneknee') or a pose-file URL. */
106
+ playPose: (urlOrTemplate: TalkingHeadPose | string, dur?: number) => void;
107
+ /** Release the current pose and return to the default stance. */
108
+ stopPose: () => void;
109
+ /** Play a full body animation from a GLB/FBX URL (e.g. a combat move). */
110
+ playAnimation: (url: string, opts?: {
111
+ dur?: number;
112
+ index?: number;
113
+ }) => void;
114
+ /** Stop the current body animation. */
115
+ stopAnimation: () => void;
116
+ /** Turn head/eyes toward viewport coordinates (px), easing over `ms`. */
117
+ lookAt: (x: number, y: number, ms?: number) => void;
93
118
  }
94
119
  /** @deprecated Use AvatarPlayerRef */
95
120
  export type TalkingHeadRefAlias = TalkingHeadRef;