talking-head-studio 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TalkingHead.d.ts +9 -3
- package/dist/html.js +22 -12
- package/package.json +1 -1
package/dist/TalkingHead.d.ts
CHANGED
|
@@ -25,11 +25,17 @@ export interface TalkingHeadVisemeSchedule {
|
|
|
25
25
|
/** Matches X-TTS-Request-Id / agent_visemes.requestId */
|
|
26
26
|
requestId?: string;
|
|
27
27
|
/**
|
|
28
|
-
* Wall-clock ms at which
|
|
29
|
-
*
|
|
30
|
-
* Used to skip cues that are already in the past on late delivery.
|
|
28
|
+
* Wall-clock ms at which the TTS request was fired (agent side).
|
|
29
|
+
* Used as the scheduling anchor plus AUDIO_PIPELINE_DELAY_MS.
|
|
31
30
|
*/
|
|
32
31
|
startedAtMs?: number;
|
|
32
|
+
/**
|
|
33
|
+
* Wall-clock ms at which audio actually began playing in the speaker.
|
|
34
|
+
* When present, used directly as the scheduling anchor with no additional
|
|
35
|
+
* pipeline offset — more accurate than startedAtMs on fast connections.
|
|
36
|
+
* Stamp this from the LiveKit onAudioPlaybackStarted callback if available.
|
|
37
|
+
*/
|
|
38
|
+
audioStartedAtMs?: number;
|
|
33
39
|
durationMs?: number;
|
|
34
40
|
cues: TalkingHeadVisemeCue[];
|
|
35
41
|
}
|
package/dist/html.js
CHANGED
|
@@ -215,7 +215,7 @@ async function loadStaticFallback(loadedAvatarUrl) {
|
|
|
215
215
|
renderer.setAnimationLoop(() => {
|
|
216
216
|
const delta = clock.getDelta();
|
|
217
217
|
if (staticMixer) staticMixer.update(delta);
|
|
218
|
-
tickVisemeDecay();
|
|
218
|
+
tickVisemeDecay(delta);
|
|
219
219
|
applyMotionBones();
|
|
220
220
|
controls.update();
|
|
221
221
|
renderer.render(scene, camera);
|
|
@@ -322,15 +322,15 @@ async function init() {
|
|
|
322
322
|
}
|
|
323
323
|
};
|
|
324
324
|
const headaudioUpdate = headaudio.update.bind(headaudio);
|
|
325
|
-
head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(); applyMotionBones(); };
|
|
325
|
+
head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(dt); applyMotionBones(); };
|
|
326
326
|
log('HeadAudio ready (phoneme lip sync)');
|
|
327
327
|
} else {
|
|
328
328
|
log('HeadAudio skipped: AudioWorklet not supported in this WebView. Use sendViseme() from native TTS callbacks.');
|
|
329
|
-
head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
|
|
329
|
+
head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
|
|
330
330
|
}
|
|
331
331
|
} catch (err) {
|
|
332
332
|
log('HeadAudio unavailable, viseme/amplitude fallback active: ' + err.message);
|
|
333
|
-
head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
|
|
333
|
+
head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
|
|
334
334
|
}
|
|
335
335
|
|
|
336
336
|
startAudioInterception();
|
|
@@ -551,7 +551,7 @@ function clearScheduledVisemes() {
|
|
|
551
551
|
for (const key of Object.keys(visemeState)) visemeState[key] = 0;
|
|
552
552
|
}
|
|
553
553
|
|
|
554
|
-
function tickVisemeDecay() {
|
|
554
|
+
function tickVisemeDecay(deltaSeconds?: number) {
|
|
555
555
|
if (!visemeMorphCache) return;
|
|
556
556
|
|
|
557
557
|
const isScheduled = Date.now() < visemeModeUntil;
|
|
@@ -566,7 +566,12 @@ function tickVisemeDecay() {
|
|
|
566
566
|
// Only decay if we aren't in the middle of a viseme schedule.
|
|
567
567
|
// Scheduled visemes are cleared manually by timeouts.
|
|
568
568
|
if (!isScheduled) {
|
|
569
|
-
|
|
569
|
+
// Time-delta-aware decay: maintain consistent feel regardless of frame rate.
|
|
570
|
+
// Base rate is calibrated for 60 fps (0.82 per frame = ~12 frames to 10%).
|
|
571
|
+
// pow(0.82, delta*60) is frame-rate independent.
|
|
572
|
+
const dt = deltaSeconds ?? (1 / 60);
|
|
573
|
+
const decayFactor = Math.pow(0.82, dt * 60);
|
|
574
|
+
const decayed = weight * decayFactor;
|
|
570
575
|
visemeState[key] = decayed < 0.01 ? 0 : decayed;
|
|
571
576
|
}
|
|
572
577
|
|
|
@@ -609,12 +614,17 @@ function scheduleVisemes(schedule) {
|
|
|
609
614
|
if (!schedule || !Array.isArray(schedule.cues) || schedule.cues.length === 0) return;
|
|
610
615
|
|
|
611
616
|
const myScheduleId = activeVisemeScheduleId;
|
|
612
|
-
//
|
|
613
|
-
//
|
|
614
|
-
//
|
|
615
|
-
//
|
|
616
|
-
|
|
617
|
-
|
|
617
|
+
// Anchor selection priority:
|
|
618
|
+
// 1. audioStartedAtMs — stamped when audio actually begins playing (most accurate)
|
|
619
|
+
// 2. startedAtMs + pipeline delay — stamped at TTS request fire time
|
|
620
|
+
//
|
|
621
|
+
// AUDIO_PIPELINE_DELAY_MS compensates for the gap between "TTS request fired"
|
|
622
|
+
// and "audio audible from speaker". Qwen3-TTS on local/tailnet is ~80–150 ms;
|
|
623
|
+
// LiveKit adds ~50–80 ms of jitter buffer on top. 150 ms is conservative but
|
|
624
|
+
// avoids the mouth running ahead of audio on fast connections.
|
|
625
|
+
const AUDIO_PIPELINE_DELAY_MS = 50;
|
|
626
|
+
let startedAt = schedule.audioStartedAtMs
|
|
627
|
+
?? ((schedule.startedAtMs || Date.now()) + AUDIO_PIPELINE_DELAY_MS);
|
|
618
628
|
const durationMs = schedule.durationMs || 0;
|
|
619
629
|
const now = Date.now();
|
|
620
630
|
let elapsedMs = Math.max(0, now - startedAt);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "talking-head-studio",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.4",
|
|
4
4
|
"description": "Cross-platform 3D avatar component for React Native & web — lip-sync, gestures, accessories, and LLM integration. Powered by TalkingHead + Three.js.",
|
|
5
5
|
"main": "dist/index.web.js",
|
|
6
6
|
"browser": "dist/index.web.js",
|