metaverse-avatar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/ASSET_LICENSES.md +122 -0
  2. package/Avatar.js +860 -0
  3. package/LICENSE.md +21 -0
  4. package/README.md +407 -0
  5. package/anims/UAL1_Standard.glb +0 -0
  6. package/anims/UAL2_Standard.glb +0 -0
  7. package/anims/pirouette.bvh +867 -0
  8. package/attachments.js +388 -0
  9. package/avatarManager.js +110 -0
  10. package/blink.js +58 -0
  11. package/bvh.js +110 -0
  12. package/gltfAnim.js +271 -0
  13. package/index.js +61 -0
  14. package/licenses/AGPL-3.0.txt +661 -0
  15. package/models/body.glb +0 -0
  16. package/models/eyes.glb +0 -0
  17. package/models/feet.glb +0 -0
  18. package/models/hands.glb +0 -0
  19. package/models/head.glb +0 -0
  20. package/models/textures/android_face.png +0 -0
  21. package/models/textures/android_face_ao.jpg +0 -0
  22. package/models/textures/android_face_metallic.jpg +0 -0
  23. package/models/textures/android_face_normal.jpg +0 -0
  24. package/models/textures/android_face_roughness.jpg +0 -0
  25. package/models/textures/android_lower.png +0 -0
  26. package/models/textures/android_lower_ao.jpg +0 -0
  27. package/models/textures/android_lower_metallic.jpg +0 -0
  28. package/models/textures/android_lower_normal.jpg +0 -0
  29. package/models/textures/android_lower_roughness.jpg +0 -0
  30. package/models/textures/android_upper.png +0 -0
  31. package/models/textures/android_upper_ao.jpg +0 -0
  32. package/models/textures/android_upper_metallic.jpg +0 -0
  33. package/models/textures/android_upper_normal.jpg +0 -0
  34. package/models/textures/android_upper_roughness.jpg +0 -0
  35. package/models/textures/blue_eyes.png +0 -0
  36. package/models/textures/layers/cute_pants.png +0 -0
  37. package/models/textures/layers/cute_shirt.png +0 -0
  38. package/nipple.js +218 -0
  39. package/package.json +48 -0
  40. package/pbr.js +225 -0
  41. package/physics.js +313 -0
  42. package/skeleton.js +70 -0
  43. package/sliders.js +590 -0
  44. package/speech.js +130 -0
  45. package/visemes.js +66 -0
  46. package/voice.js +75 -0
package/speech.js ADDED
@@ -0,0 +1,130 @@
1
+ // Audio-clip-driven lip sync (the playback counterpart to VoiceMouth).
2
+ //
3
+ // Plays a speech clip (xAI TTS audio relayed by the MCP server) out loud
4
+ // through Web Audio. When a viseme timing file accompanies the clip, the
5
+ // mouth is shaped per-viseme from the precise playback time; otherwise it
6
+ // falls back to the microphone-style AnalyserNode RMS → jaw mapping. One
7
+ // clip plays at a time; starting a new one cancels the previous.
8
+
9
+ import { buildVisemeTimeline, sampleViseme } from './visemes.js';
10
+
11
+ export class SpeechMouth {
12
+ constructor(avatar) {
13
+ this.avatar = avatar;
14
+ this.active = false;
15
+ this.level = 0; // smoothed jaw amount 0..1 (RMS fallback)
16
+
17
+ // RMS-fallback envelope (matches VoiceMouth).
18
+ this.gain = 17;
19
+ this.maxOpen = 1;
20
+ this.gate = 0.015;
21
+ this.attack = 35;
22
+ this.release = 12;
23
+
24
+ this._ctx = null; // reused across clips (browsers cap AudioContexts)
25
+ this._audio = null;
26
+ this._src = null;
27
+ this._analyser = null;
28
+ this._data = null;
29
+
30
+ this._timeline = null; // viseme segments, or null
31
+ this._m = { open: 0, round: 0, wide: 0 }; // smoothed viseme controls
32
+ this._out = { open: 0, round: 0, wide: 0 }; // scratch passed to setMouth
33
+ }
34
+
35
+ // Play the clip at `url` and lip-sync to it. If `visemeUrl` is given, fetch
36
+ // the viseme timing file and drive per-viseme mouth shapes; otherwise use
37
+ // the amplitude fallback. Resolves once playback begins, returning
38
+ // { duration } (seconds, or null if unknown).
39
+ async play(url, visemeUrl) {
40
+ this.stop();
41
+ if (!this._ctx) this._ctx = new (window.AudioContext || window.webkitAudioContext)();
42
+ await this._ctx.resume(); // may stay suspended without a prior user gesture
43
+
44
+ // Viseme timing is fetched in parallel with audio setup; same-origin.
45
+ const timelinePromise = visemeUrl
46
+ ? fetch(visemeUrl).then((r) => r.json()).then(buildVisemeTimeline).catch(() => null)
47
+ : Promise.resolve(null);
48
+
49
+ const audio = new Audio();
50
+ audio.crossOrigin = 'anonymous';
51
+ audio.src = url;
52
+
53
+ const src = this._ctx.createMediaElementSource(audio);
54
+ const analyser = this._ctx.createAnalyser();
55
+ analyser.fftSize = 1024;
56
+ analyser.smoothingTimeConstant = 0.4;
57
+ src.connect(analyser);
58
+ analyser.connect(this._ctx.destination); // route to speakers + the meter
59
+ this._audio = audio;
60
+ this._src = src;
61
+ this._analyser = analyser;
62
+ this._data = new Uint8Array(analyser.fftSize);
63
+
64
+ await new Promise((resolve, reject) => {
65
+ audio.addEventListener('loadedmetadata', resolve, { once: true });
66
+ audio.addEventListener('error', () => reject(new Error(`failed to load audio: ${url}`)), { once: true });
67
+ });
68
+ audio.addEventListener('ended', () => this.stop(), { once: true });
69
+
70
+ this._timeline = await timelinePromise;
71
+ this._m.open = this._m.round = this._m.wide = 0;
72
+ this.active = true;
73
+ await audio.play();
74
+ return { duration: Number.isFinite(audio.duration) ? audio.duration : null, visemes: !!this._timeline };
75
+ }
76
+
77
+ stop() {
78
+ if (this._audio) {
79
+ this._audio.pause();
80
+ this._audio.removeAttribute('src');
81
+ }
82
+ this._src?.disconnect();
83
+ this._analyser?.disconnect();
84
+ this._audio = null;
85
+ this._src = null;
86
+ this._analyser = null;
87
+ this._data = null;
88
+ this._timeline = null;
89
+ this._m.open = this._m.round = this._m.wide = 0;
90
+ this.active = false;
91
+ this.level = 0;
92
+ this.avatar.setMouth?.({ open: 0, round: 0, wide: 0 });
93
+ this.avatar.setMouthOpen(0);
94
+ }
95
+
96
+ update(dt) {
97
+ if (!this.active) return;
98
+
99
+ // Viseme mode: shape the mouth from the precise playback time.
100
+ if (this._timeline) {
101
+ const t = this._audio ? this._audio.currentTime : 0;
102
+ const target = sampleViseme(this._timeline, t);
103
+ const k = 1 - Math.exp(-22 * Math.min(dt, 0.1)); // smooth coarticulation
104
+ this._m.open += (target.open - this._m.open) * k;
105
+ this._m.round += (target.round - this._m.round) * k;
106
+ this._m.wide += (target.wide - this._m.wide) * k;
107
+ // "Max open" caps the jaw; lip round/spread shapes stay full so visemes
108
+ // remain legible even at a low cap.
109
+ this._out.open = this._m.open * this.maxOpen;
110
+ this._out.round = this._m.round;
111
+ this._out.wide = this._m.wide;
112
+ this.avatar.setMouth(this._out);
113
+ return;
114
+ }
115
+
116
+ // Fallback: amplitude-driven jaw.
117
+ if (!this._analyser) return;
118
+ this._analyser.getByteTimeDomainData(this._data);
119
+ let sum = 0;
120
+ for (let i = 0; i < this._data.length; i++) {
121
+ const v = (this._data[i] - 128) / 128;
122
+ sum += v * v;
123
+ }
124
+ const rms = Math.sqrt(sum / this._data.length);
125
+ const target = Math.min(1, Math.max(0, (rms - this.gate) * this.gain));
126
+ const k = target > this.level ? this.attack : this.release;
127
+ this.level += (target - this.level) * (1 - Math.exp(-k * Math.min(dt, 0.1)));
128
+ this.avatar.setMouthOpen(this.level * this.maxOpen);
129
+ }
130
+ }
package/visemes.js ADDED
@@ -0,0 +1,66 @@
1
+ // Viseme model for speech lip-sync.
2
+ //
3
+ // xAI TTS returns per-character timing (graph_chars + graph_times). We map
4
+ // each grapheme to one of a small set of visemes, each a target for three
5
+ // mouth controls (consumed by RuthAvatar.setMouth):
6
+ // open — jaw drop (vowels)
7
+ // round — lip pucker / protrude (o, u, w)
8
+ // wide — lip spread (e, i)
9
+ //
10
+ // English spelling → phoneme is irregular, so this is a heuristic grapheme
11
+ // mapping, not a phonemizer — but with correct timing and real closures
12
+ // (m/b/p shut the lips) it reads far better than amplitude-only jaw motion.
13
+
14
+ export const VISEMES = {
15
+ sil: { open: 0.0, round: 0.0, wide: 0.0 }, // silence / rest (closed)
16
+ PP: { open: 0.0, round: 0.0, wide: 0.05 }, // p b m — lips closed
17
+ FF: { open: 0.12, round: 0.0, wide: 0.25 }, // f v
18
+ TH: { open: 0.18, round: 0.0, wide: 0.20 }, // th
19
+ DD: { open: 0.25, round: 0.0, wide: 0.35 }, // generic consonant (d t n s z l c k g …)
20
+ RR: { open: 0.22, round: 0.35, wide: 0.0 }, // r
21
+ AA: { open: 0.90, round: 0.0, wide: 0.20 }, // a
22
+ E: { open: 0.45, round: 0.0, wide: 0.60 }, // e
23
+ I: { open: 0.28, round: 0.0, wide: 0.80 }, // i y
24
+ O: { open: 0.55, round: 0.70, wide: 0.0 }, // o
25
+ U: { open: 0.32, round: 0.95, wide: 0.0 }, // u w
26
+ };
27
+
28
+ const CHAR_VISEME = {
29
+ a: 'AA', e: 'E', i: 'I', o: 'O', u: 'U', y: 'I', w: 'U',
30
+ m: 'PP', b: 'PP', p: 'PP',
31
+ f: 'FF', v: 'FF',
32
+ r: 'RR',
33
+ // every other consonant collapses to a generic open-consonant shape
34
+ d: 'DD', t: 'DD', n: 'DD', s: 'DD', z: 'DD', l: 'DD', c: 'DD',
35
+ k: 'DD', g: 'DD', h: 'DD', j: 'DD', q: 'DD', x: 'DD',
36
+ };
37
+
38
+ export function charToViseme(ch) {
39
+ return CHAR_VISEME[(ch ?? '').toLowerCase()] ?? 'sil'; // spaces / punctuation / digits
40
+ }
41
+
42
+ // Build a time-ordered list of { start, end, viseme } from xAI's
43
+ // audio_timestamps ({ graph_chars, graph_times: [[start, end], …] }).
44
+ export function buildVisemeTimeline(timestamps) {
45
+ const chars = timestamps?.graph_chars ?? [];
46
+ const times = timestamps?.graph_times ?? [];
47
+ const segs = [];
48
+ for (let i = 0; i < chars.length; i++) {
49
+ const t = times[i];
50
+ if (!t || typeof t[0] !== 'number') continue;
51
+ segs.push({ start: t[0], end: typeof t[1] === 'number' ? t[1] : t[0], viseme: charToViseme(chars[i]) });
52
+ }
53
+ return segs;
54
+ }
55
+
56
+ // Target mouth controls for playback time `t` (seconds). Holds the most
57
+ // recently started viseme, relaxing to neutral during pauses between words.
58
+ export function sampleViseme(timeline, t) {
59
+ if (!timeline || timeline.length === 0) return VISEMES.sil;
60
+ let seg = null;
61
+ for (const s of timeline) {
62
+ if (s.start <= t) seg = s; else break;
63
+ }
64
+ if (!seg || t > seg.end + 0.12) return VISEMES.sil;
65
+ return VISEMES[seg.viseme] ?? VISEMES.sil;
66
+ }
package/voice.js ADDED
@@ -0,0 +1,75 @@
1
+ // Microphone-driven lip sync.
2
+ //
3
+ // Taps the mic through a Web Audio AnalyserNode, measures the time-domain
4
+ // RMS each frame, gates out background noise, and maps the result to a jaw
5
+ // open amount with a fast attack / slower release envelope so the mouth
6
+ // tracks speech without chattering. The smoothed level is pushed to
7
+ // RuthAvatar.setMouthOpen every frame.
8
+
9
+ export class VoiceMouth {
10
+ constructor(avatar) {
11
+ this.avatar = avatar;
12
+ this.active = false;
13
+ this.level = 0; // smoothed jaw amount 0..1 (also the meter value)
14
+
15
+ this.gain = 17; // rms → open scale (Sensitivity slider)
16
+ this.maxOpen = 1; // jaw cap 0..1 (Max open slider)
17
+ this.gate = 0.015; // noise floor below which the mouth stays shut
18
+ this.attack = 35; // 1/s — open quickly
19
+ this.release = 12; // 1/s — close more slowly
20
+
21
+ this._stream = null;
22
+ this._ctx = null;
23
+ this._analyser = null;
24
+ this._data = null;
25
+ }
26
+
27
+ async start() {
28
+ if (this.active) return;
29
+ if (!navigator.mediaDevices?.getUserMedia) {
30
+ throw new Error('microphone not available in this browser');
31
+ }
32
+ this._stream = await navigator.mediaDevices.getUserMedia({
33
+ audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: false },
34
+ });
35
+ this._ctx = new (window.AudioContext || window.webkitAudioContext)();
36
+ await this._ctx.resume();
37
+ const src = this._ctx.createMediaStreamSource(this._stream);
38
+ this._analyser = this._ctx.createAnalyser();
39
+ this._analyser.fftSize = 1024;
40
+ this._analyser.smoothingTimeConstant = 0.4;
41
+ src.connect(this._analyser);
42
+ this._data = new Uint8Array(this._analyser.fftSize);
43
+ this.active = true;
44
+ }
45
+
46
+ stop() {
47
+ if (this._stream) for (const t of this._stream.getTracks()) t.stop();
48
+ if (this._ctx) this._ctx.close();
49
+ this._stream = null;
50
+ this._ctx = null;
51
+ this._analyser = null;
52
+ this._data = null;
53
+ this.active = false;
54
+ this.level = 0;
55
+ this.avatar.setMouthOpen(0);
56
+ }
57
+
58
+ update(dt) {
59
+ if (!this.active || !this._analyser) return;
60
+ this._analyser.getByteTimeDomainData(this._data);
61
+
62
+ let sum = 0;
63
+ for (let i = 0; i < this._data.length; i++) {
64
+ const v = (this._data[i] - 128) / 128;
65
+ sum += v * v;
66
+ }
67
+ const rms = Math.sqrt(sum / this._data.length);
68
+
69
+ const target = Math.min(1, Math.max(0, (rms - this.gate) * this.gain));
70
+ const k = target > this.level ? this.attack : this.release;
71
+ this.level += (target - this.level) * (1 - Math.exp(-k * Math.min(dt, 0.1)));
72
+
73
+ this.avatar.setMouthOpen(this.level * this.maxOpen);
74
+ }
75
+ }