npm - talking-head-studio - Versions diffs - 0.4.2 → 0.4.4 - Mend

talking-head-studio 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/TalkingHead.d.ts CHANGED Viewed

@@ -25,11 +25,17 @@ export interface TalkingHeadVisemeSchedule {
     /** Matches X-TTS-Request-Id / agent_visemes.requestId */
     requestId?: string;
     /**
-     * Wall-clock ms at which audio playback started.
-     * Anchor this to the moment you observe agent_state: speaking.
-     * Used to skip cues that are already in the past on late delivery.
+     * Wall-clock ms at which the TTS request was fired (agent side).
+     * Used as the scheduling anchor plus AUDIO_PIPELINE_DELAY_MS.
      */
     startedAtMs?: number;
+    /**
+     * Wall-clock ms at which audio actually began playing in the speaker.
+     * When present, used directly as the scheduling anchor with no additional
+     * pipeline offset — more accurate than startedAtMs on fast connections.
+     * Stamp this from the LiveKit onAudioPlaybackStarted callback if available.
+     */
+    audioStartedAtMs?: number;
     durationMs?: number;
     cues: TalkingHeadVisemeCue[];
 }

package/dist/html.js CHANGED Viewed

@@ -215,7 +215,7 @@ async function loadStaticFallback(loadedAvatarUrl) {
       renderer.setAnimationLoop(() => {
         const delta = clock.getDelta();
         if (staticMixer) staticMixer.update(delta);
-        tickVisemeDecay();
+        tickVisemeDecay(delta);
         applyMotionBones();
         controls.update();
         renderer.render(scene, camera);
@@ -322,15 +322,15 @@ async function init() {
             }
           };
           const headaudioUpdate = headaudio.update.bind(headaudio);
-          head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(); applyMotionBones(); };
+          head.opt.update = (dt) => { headaudioUpdate(dt); tickVisemeDecay(dt); applyMotionBones(); };
           log('HeadAudio ready (phoneme lip sync)');
         } else {
           log('HeadAudio skipped: AudioWorklet not supported in this WebView. Use sendViseme() from native TTS callbacks.');
-          head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
+          head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
         }
       } catch (err) {
         log('HeadAudio unavailable, viseme/amplitude fallback active: ' + err.message);
-        head.opt.update = () => { tickVisemeDecay(); applyMotionBones(); };
+        head.opt.update = (dt) => { tickVisemeDecay(dt); applyMotionBones(); };
       }
       startAudioInterception();
@@ -551,7 +551,7 @@ function clearScheduledVisemes() {
   for (const key of Object.keys(visemeState)) visemeState[key] = 0;
 }
-function tickVisemeDecay() {
+function tickVisemeDecay(deltaSeconds?: number) {
   if (!visemeMorphCache) return;
   const isScheduled = Date.now() < visemeModeUntil;
@@ -566,7 +566,12 @@ function tickVisemeDecay() {
     // Only decay if we aren't in the middle of a viseme schedule.
     // Scheduled visemes are cleared manually by timeouts.
     if (!isScheduled) {
-      const decayed = weight * 0.82;
+      // Time-delta-aware decay: maintain consistent feel regardless of frame rate.
+      // Base rate is calibrated for 60 fps (0.82 per frame = ~12 frames to 10%).
+      // pow(0.82, delta*60) is frame-rate independent.
+      const dt = deltaSeconds ?? (1 / 60);
+      const decayFactor = Math.pow(0.82, dt * 60);
+      const decayed = weight * decayFactor;
       visemeState[key] = decayed < 0.01 ? 0 : decayed;
     }
@@ -609,12 +614,17 @@ function scheduleVisemes(schedule) {
   if (!schedule || !Array.isArray(schedule.cues) || schedule.cues.length === 0) return;
   const myScheduleId = activeVisemeScheduleId;
-  // The startedAtMs anchor is set when tts_request_start arrives on the data
-  // channel. Audio doesn't play until ~300ms later (LiveKit audio buffering).
-  // TTS generation delay is no longer included here since visemes now arrive
-  // via direct ref call before the React render cycle.
-  const AUDIO_PIPELINE_DELAY_MS = 300;
-  let startedAt = (schedule.startedAtMs || Date.now()) + AUDIO_PIPELINE_DELAY_MS;
+  // Anchor selection priority:
+  //   1. audioStartedAtMs — stamped when audio actually begins playing (most accurate)
+  //   2. startedAtMs + pipeline delay — stamped at TTS request fire time
+  //
+  // AUDIO_PIPELINE_DELAY_MS compensates for the gap between "TTS request fired"
+  // and "audio audible from speaker". Qwen3-TTS on local/tailnet is ~80–150 ms;
+  // LiveKit adds ~50–80 ms of jitter buffer on top. 150 ms is conservative but
+  // avoids the mouth running ahead of audio on fast connections.
+  const AUDIO_PIPELINE_DELAY_MS = 50;
+  let startedAt = schedule.audioStartedAtMs
+    ?? ((schedule.startedAtMs || Date.now()) + AUDIO_PIPELINE_DELAY_MS);
   const durationMs = schedule.durationMs || 0;
   const now = Date.now();
   let elapsedMs = Math.max(0, now - startedAt);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talking-head-studio",
-  "version": "0.4.2",
+  "version": "0.4.4",
   "description": "Cross-platform 3D avatar component for React Native & web — lip-sync, gestures, accessories, and LLM integration. Powered by TalkingHead + Three.js.",
   "main": "dist/index.web.js",
   "browser": "dist/index.web.js",