talking-head-studio 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,11 +25,17 @@ export interface TalkingHeadVisemeSchedule {
25
25
  /** Matches X-TTS-Request-Id / agent_visemes.requestId */
26
26
  requestId?: string;
27
27
  /**
28
- * Wall-clock ms at which audio playback started.
29
- * Anchor this to the moment you observe agent_state: speaking.
30
- * Used to skip cues that are already in the past on late delivery.
28
+ * Wall-clock ms at which the TTS request was fired (agent side).
29
+ * Used as the scheduling anchor plus AUDIO_PIPELINE_DELAY_MS.
31
30
  */
32
31
  startedAtMs?: number;
32
+ /**
33
+ * Wall-clock ms at which audio actually began playing in the speaker.
34
+ * When present, used directly as the scheduling anchor with no additional
35
+ * pipeline offset — more accurate than startedAtMs on fast connections.
36
+ * Stamp this from the LiveKit onAudioPlaybackStarted callback if available.
37
+ */
38
+ audioStartedAtMs?: number;
33
39
  durationMs?: number;
34
40
  cues: TalkingHeadVisemeCue[];
35
41
  }
package/dist/html.js CHANGED
@@ -215,7 +215,7 @@ async function loadStaticFallback(loadedAvatarUrl) {
215
215
  renderer.setAnimationLoop(() => {
216
216
  const delta = clock.getDelta();
217
217
  if (staticMixer) staticMixer.update(delta);
218
- tickVisemeDecay();
218
+ tickVisemeDecay(delta);
219
219
  applyMotionBones();
220
220
  controls.update();
221
221
  renderer.render(scene, camera);
@@ -322,15 +322,15 @@ async function init() {
322
322
  }
323
323
  };
324
324
  const headaudioUpdate = headaudio.update.bind(headaudio);
325
- head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(); applyMotionBones(); };
325
+ head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(dt); applyMotionBones(); };
326
326
  log('HeadAudio ready (phoneme lip sync)');
327
327
  } else {
328
328
  log('HeadAudio skipped: AudioWorklet not supported in this WebView. Use sendViseme() from native TTS callbacks.');
329
- head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
329
+ head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
330
330
  }
331
331
  } catch (err) {
332
332
  log('HeadAudio unavailable, viseme/amplitude fallback active: ' + err.message);
333
- head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
333
+ head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
334
334
  }
335
335
 
336
336
  startAudioInterception();
@@ -551,7 +551,7 @@ function clearScheduledVisemes() {
551
551
  for (const key of Object.keys(visemeState)) visemeState[key] = 0;
552
552
  }
553
553
 
554
- function tickVisemeDecay() {
554
+ function tickVisemeDecay(deltaSeconds?: number) {
555
555
  if (!visemeMorphCache) return;
556
556
 
557
557
  const isScheduled = Date.now() < visemeModeUntil;
@@ -566,7 +566,12 @@ function tickVisemeDecay() {
566
566
  // Only decay if we aren't in the middle of a viseme schedule.
567
567
  // Scheduled visemes are cleared manually by timeouts.
568
568
  if (!isScheduled) {
569
- const decayed = weight * 0.82;
569
+ // Time-delta-aware decay: maintain consistent feel regardless of frame rate.
570
+ // Base rate is calibrated for 60 fps (0.82 per frame = ~12 frames to 10%).
571
+ // pow(0.82, delta*60) is frame-rate independent.
572
+ const dt = deltaSeconds ?? (1 / 60);
573
+ const decayFactor = Math.pow(0.82, dt * 60);
574
+ const decayed = weight * decayFactor;
570
575
  visemeState[key] = decayed < 0.01 ? 0 : decayed;
571
576
  }
572
577
 
@@ -609,12 +614,17 @@ function scheduleVisemes(schedule) {
609
614
  if (!schedule || !Array.isArray(schedule.cues) || schedule.cues.length === 0) return;
610
615
 
611
616
  const myScheduleId = activeVisemeScheduleId;
612
- // The startedAtMs anchor is set when tts_request_start arrives on the data
613
- // channel. Audio doesn't play until ~300ms later (LiveKit audio buffering).
614
- // TTS generation delay is no longer included here since visemes now arrive
615
- // via direct ref call before the React render cycle.
616
- const AUDIO_PIPELINE_DELAY_MS = 300;
617
- let startedAt = (schedule.startedAtMs || Date.now()) + AUDIO_PIPELINE_DELAY_MS;
617
+ // Anchor selection priority:
618
+ // 1. audioStartedAtMs stamped when audio actually begins playing (most accurate)
619
+ // 2. startedAtMs + pipeline delay stamped at TTS request fire time
620
+ //
621
+ // AUDIO_PIPELINE_DELAY_MS compensates for the gap between "TTS request fired"
622
+ // and "audio audible from speaker". Qwen3-TTS on local/tailnet is ~80–150 ms;
623
+ // LiveKit adds ~50–80 ms of jitter buffer on top. 150 ms is conservative but
624
+ // avoids the mouth running ahead of audio on fast connections.
625
+ const AUDIO_PIPELINE_DELAY_MS = 50;
626
+ let startedAt = schedule.audioStartedAtMs
627
+ ?? ((schedule.startedAtMs || Date.now()) + AUDIO_PIPELINE_DELAY_MS);
618
628
  const durationMs = schedule.durationMs || 0;
619
629
  const now = Date.now();
620
630
  let elapsedMs = Math.max(0, now - startedAt);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talking-head-studio",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "Cross-platform 3D avatar component for React Native & web — lip-sync, gestures, accessories, and LLM integration. Powered by TalkingHead + Three.js.",
5
5
  "main": "dist/index.web.js",
6
6
  "browser": "dist/index.web.js",