metaverse-avatar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ASSET_LICENSES.md +122 -0
- package/Avatar.js +860 -0
- package/LICENSE.md +21 -0
- package/README.md +407 -0
- package/anims/UAL1_Standard.glb +0 -0
- package/anims/UAL2_Standard.glb +0 -0
- package/anims/pirouette.bvh +867 -0
- package/attachments.js +388 -0
- package/avatarManager.js +110 -0
- package/blink.js +58 -0
- package/bvh.js +110 -0
- package/gltfAnim.js +271 -0
- package/index.js +61 -0
- package/licenses/AGPL-3.0.txt +661 -0
- package/models/body.glb +0 -0
- package/models/eyes.glb +0 -0
- package/models/feet.glb +0 -0
- package/models/hands.glb +0 -0
- package/models/head.glb +0 -0
- package/models/textures/android_face.png +0 -0
- package/models/textures/android_face_ao.jpg +0 -0
- package/models/textures/android_face_metallic.jpg +0 -0
- package/models/textures/android_face_normal.jpg +0 -0
- package/models/textures/android_face_roughness.jpg +0 -0
- package/models/textures/android_lower.png +0 -0
- package/models/textures/android_lower_ao.jpg +0 -0
- package/models/textures/android_lower_metallic.jpg +0 -0
- package/models/textures/android_lower_normal.jpg +0 -0
- package/models/textures/android_lower_roughness.jpg +0 -0
- package/models/textures/android_upper.png +0 -0
- package/models/textures/android_upper_ao.jpg +0 -0
- package/models/textures/android_upper_metallic.jpg +0 -0
- package/models/textures/android_upper_normal.jpg +0 -0
- package/models/textures/android_upper_roughness.jpg +0 -0
- package/models/textures/blue_eyes.png +0 -0
- package/models/textures/layers/cute_pants.png +0 -0
- package/models/textures/layers/cute_shirt.png +0 -0
- package/nipple.js +218 -0
- package/package.json +48 -0
- package/pbr.js +225 -0
- package/physics.js +313 -0
- package/skeleton.js +70 -0
- package/sliders.js +590 -0
- package/speech.js +130 -0
- package/visemes.js +66 -0
- package/voice.js +75 -0
package/speech.js
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// Audio-clip-driven lip sync (the playback counterpart to VoiceMouth).
|
|
2
|
+
//
|
|
3
|
+
// Plays a speech clip (xAI TTS audio relayed by the MCP server) out loud
|
|
4
|
+
// through Web Audio. When a viseme timing file accompanies the clip, the
|
|
5
|
+
// mouth is shaped per-viseme from the precise playback time; otherwise it
|
|
6
|
+
// falls back to the microphone-style AnalyserNode RMS → jaw mapping. One
|
|
7
|
+
// clip plays at a time; starting a new one cancels the previous.
|
|
8
|
+
|
|
9
|
+
import { buildVisemeTimeline, sampleViseme } from './visemes.js';
|
|
10
|
+
|
|
11
|
+
export class SpeechMouth {
|
|
12
|
+
constructor(avatar) {
|
|
13
|
+
this.avatar = avatar;
|
|
14
|
+
this.active = false;
|
|
15
|
+
this.level = 0; // smoothed jaw amount 0..1 (RMS fallback)
|
|
16
|
+
|
|
17
|
+
// RMS-fallback envelope (matches VoiceMouth).
|
|
18
|
+
this.gain = 17;
|
|
19
|
+
this.maxOpen = 1;
|
|
20
|
+
this.gate = 0.015;
|
|
21
|
+
this.attack = 35;
|
|
22
|
+
this.release = 12;
|
|
23
|
+
|
|
24
|
+
this._ctx = null; // reused across clips (browsers cap AudioContexts)
|
|
25
|
+
this._audio = null;
|
|
26
|
+
this._src = null;
|
|
27
|
+
this._analyser = null;
|
|
28
|
+
this._data = null;
|
|
29
|
+
|
|
30
|
+
this._timeline = null; // viseme segments, or null
|
|
31
|
+
this._m = { open: 0, round: 0, wide: 0 }; // smoothed viseme controls
|
|
32
|
+
this._out = { open: 0, round: 0, wide: 0 }; // scratch passed to setMouth
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Play the clip at `url` and lip-sync to it. If `visemeUrl` is given, fetch
|
|
36
|
+
// the viseme timing file and drive per-viseme mouth shapes; otherwise use
|
|
37
|
+
// the amplitude fallback. Resolves once playback begins, returning
|
|
38
|
+
// { duration } (seconds, or null if unknown).
|
|
39
|
+
async play(url, visemeUrl) {
|
|
40
|
+
this.stop();
|
|
41
|
+
if (!this._ctx) this._ctx = new (window.AudioContext || window.webkitAudioContext)();
|
|
42
|
+
await this._ctx.resume(); // may stay suspended without a prior user gesture
|
|
43
|
+
|
|
44
|
+
// Viseme timing is fetched in parallel with audio setup; same-origin.
|
|
45
|
+
const timelinePromise = visemeUrl
|
|
46
|
+
? fetch(visemeUrl).then((r) => r.json()).then(buildVisemeTimeline).catch(() => null)
|
|
47
|
+
: Promise.resolve(null);
|
|
48
|
+
|
|
49
|
+
const audio = new Audio();
|
|
50
|
+
audio.crossOrigin = 'anonymous';
|
|
51
|
+
audio.src = url;
|
|
52
|
+
|
|
53
|
+
const src = this._ctx.createMediaElementSource(audio);
|
|
54
|
+
const analyser = this._ctx.createAnalyser();
|
|
55
|
+
analyser.fftSize = 1024;
|
|
56
|
+
analyser.smoothingTimeConstant = 0.4;
|
|
57
|
+
src.connect(analyser);
|
|
58
|
+
analyser.connect(this._ctx.destination); // route to speakers + the meter
|
|
59
|
+
this._audio = audio;
|
|
60
|
+
this._src = src;
|
|
61
|
+
this._analyser = analyser;
|
|
62
|
+
this._data = new Uint8Array(analyser.fftSize);
|
|
63
|
+
|
|
64
|
+
await new Promise((resolve, reject) => {
|
|
65
|
+
audio.addEventListener('loadedmetadata', resolve, { once: true });
|
|
66
|
+
audio.addEventListener('error', () => reject(new Error(`failed to load audio: ${url}`)), { once: true });
|
|
67
|
+
});
|
|
68
|
+
audio.addEventListener('ended', () => this.stop(), { once: true });
|
|
69
|
+
|
|
70
|
+
this._timeline = await timelinePromise;
|
|
71
|
+
this._m.open = this._m.round = this._m.wide = 0;
|
|
72
|
+
this.active = true;
|
|
73
|
+
await audio.play();
|
|
74
|
+
return { duration: Number.isFinite(audio.duration) ? audio.duration : null, visemes: !!this._timeline };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
stop() {
|
|
78
|
+
if (this._audio) {
|
|
79
|
+
this._audio.pause();
|
|
80
|
+
this._audio.removeAttribute('src');
|
|
81
|
+
}
|
|
82
|
+
this._src?.disconnect();
|
|
83
|
+
this._analyser?.disconnect();
|
|
84
|
+
this._audio = null;
|
|
85
|
+
this._src = null;
|
|
86
|
+
this._analyser = null;
|
|
87
|
+
this._data = null;
|
|
88
|
+
this._timeline = null;
|
|
89
|
+
this._m.open = this._m.round = this._m.wide = 0;
|
|
90
|
+
this.active = false;
|
|
91
|
+
this.level = 0;
|
|
92
|
+
this.avatar.setMouth?.({ open: 0, round: 0, wide: 0 });
|
|
93
|
+
this.avatar.setMouthOpen(0);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
update(dt) {
|
|
97
|
+
if (!this.active) return;
|
|
98
|
+
|
|
99
|
+
// Viseme mode: shape the mouth from the precise playback time.
|
|
100
|
+
if (this._timeline) {
|
|
101
|
+
const t = this._audio ? this._audio.currentTime : 0;
|
|
102
|
+
const target = sampleViseme(this._timeline, t);
|
|
103
|
+
const k = 1 - Math.exp(-22 * Math.min(dt, 0.1)); // smooth coarticulation
|
|
104
|
+
this._m.open += (target.open - this._m.open) * k;
|
|
105
|
+
this._m.round += (target.round - this._m.round) * k;
|
|
106
|
+
this._m.wide += (target.wide - this._m.wide) * k;
|
|
107
|
+
// "Max open" caps the jaw; lip round/spread shapes stay full so visemes
|
|
108
|
+
// remain legible even at a low cap.
|
|
109
|
+
this._out.open = this._m.open * this.maxOpen;
|
|
110
|
+
this._out.round = this._m.round;
|
|
111
|
+
this._out.wide = this._m.wide;
|
|
112
|
+
this.avatar.setMouth(this._out);
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Fallback: amplitude-driven jaw.
|
|
117
|
+
if (!this._analyser) return;
|
|
118
|
+
this._analyser.getByteTimeDomainData(this._data);
|
|
119
|
+
let sum = 0;
|
|
120
|
+
for (let i = 0; i < this._data.length; i++) {
|
|
121
|
+
const v = (this._data[i] - 128) / 128;
|
|
122
|
+
sum += v * v;
|
|
123
|
+
}
|
|
124
|
+
const rms = Math.sqrt(sum / this._data.length);
|
|
125
|
+
const target = Math.min(1, Math.max(0, (rms - this.gate) * this.gain));
|
|
126
|
+
const k = target > this.level ? this.attack : this.release;
|
|
127
|
+
this.level += (target - this.level) * (1 - Math.exp(-k * Math.min(dt, 0.1)));
|
|
128
|
+
this.avatar.setMouthOpen(this.level * this.maxOpen);
|
|
129
|
+
}
|
|
130
|
+
}
|
package/visemes.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// Viseme model for speech lip-sync.
|
|
2
|
+
//
|
|
3
|
+
// xAI TTS returns per-character timing (graph_chars + graph_times). We map
|
|
4
|
+
// each grapheme to one of a small set of visemes, each a target for three
|
|
5
|
+
// mouth controls (consumed by RuthAvatar.setMouth):
|
|
6
|
+
// open — jaw drop (vowels)
|
|
7
|
+
// round — lip pucker / protrude (o, u, w)
|
|
8
|
+
// wide — lip spread (e, i)
|
|
9
|
+
//
|
|
10
|
+
// English spelling → phoneme is irregular, so this is a heuristic grapheme
|
|
11
|
+
// mapping, not a phonemizer — but with correct timing and real closures
|
|
12
|
+
// (m/b/p shut the lips) it reads far better than amplitude-only jaw motion.
|
|
13
|
+
|
|
14
|
+
export const VISEMES = {
|
|
15
|
+
sil: { open: 0.0, round: 0.0, wide: 0.0 }, // silence / rest (closed)
|
|
16
|
+
PP: { open: 0.0, round: 0.0, wide: 0.05 }, // p b m — lips closed
|
|
17
|
+
FF: { open: 0.12, round: 0.0, wide: 0.25 }, // f v
|
|
18
|
+
TH: { open: 0.18, round: 0.0, wide: 0.20 }, // th
|
|
19
|
+
DD: { open: 0.25, round: 0.0, wide: 0.35 }, // generic consonant (d t n s z l c k g …)
|
|
20
|
+
RR: { open: 0.22, round: 0.35, wide: 0.0 }, // r
|
|
21
|
+
AA: { open: 0.90, round: 0.0, wide: 0.20 }, // a
|
|
22
|
+
E: { open: 0.45, round: 0.0, wide: 0.60 }, // e
|
|
23
|
+
I: { open: 0.28, round: 0.0, wide: 0.80 }, // i y
|
|
24
|
+
O: { open: 0.55, round: 0.70, wide: 0.0 }, // o
|
|
25
|
+
U: { open: 0.32, round: 0.95, wide: 0.0 }, // u w
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const CHAR_VISEME = {
|
|
29
|
+
a: 'AA', e: 'E', i: 'I', o: 'O', u: 'U', y: 'I', w: 'U',
|
|
30
|
+
m: 'PP', b: 'PP', p: 'PP',
|
|
31
|
+
f: 'FF', v: 'FF',
|
|
32
|
+
r: 'RR',
|
|
33
|
+
// every other consonant collapses to a generic open-consonant shape
|
|
34
|
+
d: 'DD', t: 'DD', n: 'DD', s: 'DD', z: 'DD', l: 'DD', c: 'DD',
|
|
35
|
+
k: 'DD', g: 'DD', h: 'DD', j: 'DD', q: 'DD', x: 'DD',
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
export function charToViseme(ch) {
|
|
39
|
+
return CHAR_VISEME[(ch ?? '').toLowerCase()] ?? 'sil'; // spaces / punctuation / digits
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Build a time-ordered list of { start, end, viseme } from xAI's
|
|
43
|
+
// audio_timestamps ({ graph_chars, graph_times: [[start, end], …] }).
|
|
44
|
+
export function buildVisemeTimeline(timestamps) {
|
|
45
|
+
const chars = timestamps?.graph_chars ?? [];
|
|
46
|
+
const times = timestamps?.graph_times ?? [];
|
|
47
|
+
const segs = [];
|
|
48
|
+
for (let i = 0; i < chars.length; i++) {
|
|
49
|
+
const t = times[i];
|
|
50
|
+
if (!t || typeof t[0] !== 'number') continue;
|
|
51
|
+
segs.push({ start: t[0], end: typeof t[1] === 'number' ? t[1] : t[0], viseme: charToViseme(chars[i]) });
|
|
52
|
+
}
|
|
53
|
+
return segs;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Target mouth controls for playback time `t` (seconds). Holds the most
|
|
57
|
+
// recently started viseme, relaxing to neutral during pauses between words.
|
|
58
|
+
export function sampleViseme(timeline, t) {
|
|
59
|
+
if (!timeline || timeline.length === 0) return VISEMES.sil;
|
|
60
|
+
let seg = null;
|
|
61
|
+
for (const s of timeline) {
|
|
62
|
+
if (s.start <= t) seg = s; else break;
|
|
63
|
+
}
|
|
64
|
+
if (!seg || t > seg.end + 0.12) return VISEMES.sil;
|
|
65
|
+
return VISEMES[seg.viseme] ?? VISEMES.sil;
|
|
66
|
+
}
|
package/voice.js
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
// Microphone-driven lip sync.
|
|
2
|
+
//
|
|
3
|
+
// Taps the mic through a Web Audio AnalyserNode, measures the time-domain
|
|
4
|
+
// RMS each frame, gates out background noise, and maps the result to a jaw
|
|
5
|
+
// open amount with a fast attack / slower release envelope so the mouth
|
|
6
|
+
// tracks speech without chattering. The smoothed level is pushed to
|
|
7
|
+
// RuthAvatar.setMouthOpen every frame.
|
|
8
|
+
|
|
9
|
+
export class VoiceMouth {
|
|
10
|
+
constructor(avatar) {
|
|
11
|
+
this.avatar = avatar;
|
|
12
|
+
this.active = false;
|
|
13
|
+
this.level = 0; // smoothed jaw amount 0..1 (also the meter value)
|
|
14
|
+
|
|
15
|
+
this.gain = 17; // rms → open scale (Sensitivity slider)
|
|
16
|
+
this.maxOpen = 1; // jaw cap 0..1 (Max open slider)
|
|
17
|
+
this.gate = 0.015; // noise floor below which the mouth stays shut
|
|
18
|
+
this.attack = 35; // 1/s — open quickly
|
|
19
|
+
this.release = 12; // 1/s — close more slowly
|
|
20
|
+
|
|
21
|
+
this._stream = null;
|
|
22
|
+
this._ctx = null;
|
|
23
|
+
this._analyser = null;
|
|
24
|
+
this._data = null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async start() {
|
|
28
|
+
if (this.active) return;
|
|
29
|
+
if (!navigator.mediaDevices?.getUserMedia) {
|
|
30
|
+
throw new Error('microphone not available in this browser');
|
|
31
|
+
}
|
|
32
|
+
this._stream = await navigator.mediaDevices.getUserMedia({
|
|
33
|
+
audio: { echoCancellation: true, noiseSuppression: true, autoGainControl: false },
|
|
34
|
+
});
|
|
35
|
+
this._ctx = new (window.AudioContext || window.webkitAudioContext)();
|
|
36
|
+
await this._ctx.resume();
|
|
37
|
+
const src = this._ctx.createMediaStreamSource(this._stream);
|
|
38
|
+
this._analyser = this._ctx.createAnalyser();
|
|
39
|
+
this._analyser.fftSize = 1024;
|
|
40
|
+
this._analyser.smoothingTimeConstant = 0.4;
|
|
41
|
+
src.connect(this._analyser);
|
|
42
|
+
this._data = new Uint8Array(this._analyser.fftSize);
|
|
43
|
+
this.active = true;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
stop() {
|
|
47
|
+
if (this._stream) for (const t of this._stream.getTracks()) t.stop();
|
|
48
|
+
if (this._ctx) this._ctx.close();
|
|
49
|
+
this._stream = null;
|
|
50
|
+
this._ctx = null;
|
|
51
|
+
this._analyser = null;
|
|
52
|
+
this._data = null;
|
|
53
|
+
this.active = false;
|
|
54
|
+
this.level = 0;
|
|
55
|
+
this.avatar.setMouthOpen(0);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
update(dt) {
|
|
59
|
+
if (!this.active || !this._analyser) return;
|
|
60
|
+
this._analyser.getByteTimeDomainData(this._data);
|
|
61
|
+
|
|
62
|
+
let sum = 0;
|
|
63
|
+
for (let i = 0; i < this._data.length; i++) {
|
|
64
|
+
const v = (this._data[i] - 128) / 128;
|
|
65
|
+
sum += v * v;
|
|
66
|
+
}
|
|
67
|
+
const rms = Math.sqrt(sum / this._data.length);
|
|
68
|
+
|
|
69
|
+
const target = Math.min(1, Math.max(0, (rms - this.gate) * this.gain));
|
|
70
|
+
const k = target > this.level ? this.attack : this.release;
|
|
71
|
+
this.level += (target - this.level) * (1 - Math.exp(-k * Math.min(dt, 0.1)));
|
|
72
|
+
|
|
73
|
+
this.avatar.setMouthOpen(this.level * this.maxOpen);
|
|
74
|
+
}
|
|
75
|
+
}
|