npm - @sage-rsc/talking-head-react - Versions diffs - 1.7.6 → 1.7.8 - Mend

@sage-rsc/talking-head-react 1.7.6 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/index.cjs +6 -6
package/dist/index.js +2158 -2038
package/package.json +1 -1
package/src/components/SimpleTalkingAvatar.jsx +95 -49
package/src/lib/talkinghead.mjs +193 -9
package/src/utils/animationLoader.js +24 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sage-rsc/talking-head-react",
-  "version": "1.7.6",
+  "version": "1.7.8",
   "description": "A reusable React component for 3D talking avatars with lip-sync and text-to-speech",
   "main": "./dist/index.cjs",
   "module": "./dist/index.js",

package/src/components/SimpleTalkingAvatar.jsx CHANGED Viewed

@@ -81,6 +81,8 @@ const SimpleTalkingAvatar = forwardRef(({
   const currentAnimationGroupRef = useRef(null);
   const playedAnimationsRef = useRef([]); // Track animations played during current speech
   const animationQueueRef = useRef([]); // Queue of animations to play in order
+  const currentSentenceIndexRef = useRef(0); // Track which sentence is currently playing
+  const pausedAudioDataRef = useRef(null); // Store trimmed audio buffer when paused
   // Keep ref in sync with state
   useEffect(() => {
@@ -118,11 +120,12 @@ const SimpleTalkingAvatar = forwardRef(({
       // Option 1: Load from manifest file only
       if (animations.manifest && !animations.auto) {
-        try {
-          const manifestAnimations = await loadAnimationsFromManifest(animations.manifest);
+        const manifestAnimations = await loadAnimationsFromManifest(animations.manifest);
+        // loadAnimationsFromManifest returns {} on error, so check if we got anything
+        const hasAnimations = Object.keys(manifestAnimations).length > 0;
+        if (hasAnimations) {
           setLoadedAnimations(manifestAnimations);
-        } catch (error) {
-          console.error('Failed to load animation manifest:', error);
+        } else {
           setLoadedAnimations(animations);
         }
       }
@@ -155,7 +158,7 @@ const SimpleTalkingAvatar = forwardRef(({
             folderPaths[`shared_talking`] = `${basePath}/shared/talking`;
             folderPaths[`shared_idle`] = `${basePath}/shared/idle`;
-            console.log(`📁 Loading animations from folders with genderKey="${genderKey}" for avatarBody="${avatarBody}"`);
+            // Loading animations from folders
             const discoveredAnimations = await autoLoadAnimationsFromFolders(folderPaths, avatarBody);
             // Check if we found any animations
@@ -406,33 +409,30 @@ const SimpleTalkingAvatar = forwardRef(({
     if (loadedAnimations._genderSpecific) {
       const genderKey = getGenderKey(avatarBody);
-      // Debug: Log gender detection
-      console.log(`🔍 Gender detection: avatarBody="${avatarBody}" -> genderKey="${genderKey}"`);
       const genderGroups = loadedAnimations._genderSpecific[genderKey];
       // Try gender-specific first
       if (genderGroups && genderGroups[groupName]) {
         group = genderGroups[groupName];
-        console.log(`✅ Found ${genderKey} animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
       }
       // Fallback to shared gender-specific animations
       else if (loadedAnimations._genderSpecific.shared && loadedAnimations._genderSpecific.shared[groupName]) {
         group = loadedAnimations._genderSpecific.shared[groupName];
-        console.log(`✅ Found shared animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
-      } else {
-        console.log(`⚠️ No ${genderKey} or shared animations found for "${groupName}"`);
       }
     }
     // Fallback to root-level animations if gender-specific not found
     if (!group && loadedAnimations[groupName]) {
       group = loadedAnimations[groupName];
-      console.log(`✅ Found root-level animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
     }
     if (!group) {
-      console.log(`❌ No animations found for "${groupName}"`);
+      // Only log warning if animations were actually configured (not just empty object)
+      const hasAnyAnimations = Object.keys(loadedAnimations).length > 0 ||
+                               (loadedAnimations._genderSpecific && Object.keys(loadedAnimations._genderSpecific).length > 0);
+      if (hasAnyAnimations) {
+        console.warn(`⚠️ No animations found for group "${groupName}". Make sure animations are configured correctly.`);
+      }
       return [];
     }
@@ -614,6 +614,7 @@ const SimpleTalkingAvatar = forwardRef(({
     // Split text into sentences for tracking
     const sentences = textToSpeak.split(/[.!?]+/).filter(s => s.trim().length > 0);
     originalSentencesRef.current = sentences;
+    currentSentenceIndexRef.current = 0; // Reset sentence tracking
     const speakOptions = {
       lipsyncLang: options.lipsyncLang || 'en',
@@ -693,8 +694,8 @@ const SimpleTalkingAvatar = forwardRef(({
     try {
       // Check if currently speaking
       const isSpeaking = talkingHeadRef.current.isSpeaking || false;
-      const audioPlaylist = talkingHeadRef.current.audioPlaylist || [];
-      const speechQueue = talkingHeadRef.current.speechQueue || [];
+      const audioPlaylist = [...(talkingHeadRef.current.audioPlaylist || [])]; // Copy before pausing
+      const speechQueue = [...(talkingHeadRef.current.speechQueue || [])]; // Copy before clearing
       if (isSpeaking || audioPlaylist.length > 0 || speechQueue.length > 0) {
         // Clear speech end interval
@@ -703,51 +704,84 @@ const SimpleTalkingAvatar = forwardRef(({
           speechEndIntervalRef.current = null;
         }
-        // Extract remaining text from speech queue (not yet sent to TTS)
-        let remainingText = '';
-        if (speechQueue.length > 0) {
-          remainingText = speechQueue.map(item => {
-            if (item.text && Array.isArray(item.text)) {
-              return item.text.map(wordObj => wordObj.word).join(' ');
-            }
-            return item.text || '';
-          }).join(' ');
-        }
-        // Extract text from audio playlist (currently playing or queued audio)
-        // This includes the currently playing sentence if it was interrupted
-        let audioPlaylistText = '';
-        if (audioPlaylist.length > 0) {
-          audioPlaylistText = audioPlaylist
-            .map(item => {
-              // Try to get text from the audio item
+        // IMPORTANT: Extract text BEFORE calling pauseSpeaking(), which clears audioPlaylist
+        // Track which sentences need to be re-spoken
+        const sentences = originalSentencesRef.current;
+        let remainingSentences = [];
+        // Check if audio is currently playing (item already shifted from playlist)
+        const isAudioCurrentlyPlaying = talkingHeadRef.current.isAudioPlaying || false;
+        if (sentences.length > 0) {
+          // Calculate which sentence is currently playing
+          // Total sentences = sentences.length
+          // Queued in audioPlaylist = audioPlaylist.length (already processed by TTS, waiting to play)
+          // Queued in speechQueue = speechQueue.length (not yet sent to TTS)
+          // Currently playing = 1 (if isAudioPlaying is true)
+          const queuedCount = audioPlaylist.length + speechQueue.length;
+          const currentlyPlayingCount = isAudioCurrentlyPlaying ? 1 : 0;
+          const processedCount = sentences.length - queuedCount - currentlyPlayingCount;
+          // If audio is currently playing, we're mid-sentence - restart from current sentence
+          // Otherwise, continue from next sentence
+          const startIndex = isAudioCurrentlyPlaying ? processedCount : processedCount + currentlyPlayingCount;
+          if (startIndex < sentences.length) {
+            remainingSentences = sentences.slice(startIndex);
+          }
+        } else {
+          // Fallback: Extract text from queues if we don't have original sentences
+          // Extract text from audio playlist (queued audio, not yet playing)
+          if (audioPlaylist.length > 0) {
+            audioPlaylist.forEach(item => {
               if (item.text) {
                 if (Array.isArray(item.text)) {
-                  return item.text.map(wordObj => wordObj.word).join(' ');
+                  const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
+                  if (sentenceText.trim()) {
+                    remainingSentences.push(sentenceText);
+                  }
+                } else if (item.text.trim()) {
+                  remainingSentences.push(item.text);
+                }
+              }
+            });
+          }
+          // Extract remaining text from speech queue (not yet sent to TTS)
+          if (speechQueue.length > 0) {
+            speechQueue.forEach(item => {
+              if (item.text) {
+                if (Array.isArray(item.text)) {
+                  const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
+                  if (sentenceText.trim()) {
+                    remainingSentences.push(sentenceText);
+                  }
+                } else if (item.text.trim()) {
+                  remainingSentences.push(item.text);
                 }
-                return item.text;
               }
-              return '';
-            })
-            .filter(text => text.trim().length > 0)
-            .join(' ');
+            });
+          }
         }
-        // Combine: if audio is playing, include that text first, then remaining queue text
-        const combinedRemainingText = audioPlaylistText
-          ? (audioPlaylistText + (remainingText ? ' ' + remainingText : ''))
-          : remainingText;
+        // Combine remaining sentences
+        const combinedRemainingText = remainingSentences.join(' ');
         // Store progress for resume
         speechProgressRef.current = {
           remainingText: combinedRemainingText || null,
           originalText: pausedSpeechRef.current?.text || null,
-          options: pausedSpeechRef.current?.options || null
+          options: pausedSpeechRef.current?.options || null,
+          // Track if we're pausing mid-sentence (has currently playing audio)
+          isMidSentence: audioPlaylist.length > 0
         };
-        // Clear speech queue and pause
+        // Clear speech queue and pause (this will stop audio and clear audioPlaylist)
+        // pauseSpeaking() now returns trimmed audio data if audio was playing
         talkingHeadRef.current.speechQueue.length = 0;
-        talkingHeadRef.current.pauseSpeaking();
+        const pausedAudioData = talkingHeadRef.current.pauseSpeaking();
+        pausedAudioDataRef.current = pausedAudioData; // Store trimmed buffer for exact resume
         setIsPaused(true);
         isPausedRef.current = true;
       }
@@ -767,7 +801,15 @@ const SimpleTalkingAvatar = forwardRef(({
       setIsPaused(false);
       isPausedRef.current = false;
-      // Determine what text to speak
+      // If we have trimmed audio data from pause, resume from exact position
+      if (pausedAudioDataRef.current && pausedAudioDataRef.current.audio) {
+        // Resume with trimmed buffer (exact position)
+        await talkingHeadRef.current.playAudio(false, pausedAudioDataRef.current);
+        pausedAudioDataRef.current = null; // Clear after use
+        return;
+      }
+      // Otherwise, resume from remaining text (fallback)
       const remainingText = speechProgressRef.current?.remainingText;
       const originalText = speechProgressRef.current?.originalText || pausedSpeechRef.current?.text;
       const originalOptions = speechProgressRef.current?.options || pausedSpeechRef.current?.options || {};
@@ -777,10 +819,14 @@ const SimpleTalkingAvatar = forwardRef(({
       if (textToSpeak) {
         speakText(textToSpeak, originalOptions);
       }
+      // Clear paused audio data
+      pausedAudioDataRef.current = null;
     } catch (err) {
       console.warn('Error resuming speech:', err);
       setIsPaused(false);
       isPausedRef.current = false;
+      pausedAudioDataRef.current = null;
     }
   }, [isPaused, speakText, resumeAudioContext]);

package/src/lib/talkinghead.mjs CHANGED Viewed

@@ -835,6 +835,11 @@ class TalkingHead {
     this.speechQueue = [];
     this.isSpeaking = false;
     this.isListening = false;
+    // Pause/resume tracking for buffer trimming
+    this.audioStartTime = null; // When current audio started playing
+    this.currentAudioItem = null; // Current audio item being played
+    this.pausedAudioData = null; // Stored trimmed buffer when paused
     // Setup Google text-to-speech
     if ( this.opt.ttsEndpoint ) {
@@ -3718,10 +3723,78 @@ class TalkingHead {
   /**
   * Play audio playlist using Web Audio API.
   * @param {boolean} [force=false] If true, forces to proceed
+  * @param {Object} [pausedAudioData=null] Trimmed audio data from pause to resume from exact position
   */
-  async playAudio(force=false) {
+  async playAudio(force=false, pausedAudioData=null) {
     if ( !this.armature || (this.isAudioPlaying && !force) ) return;
     this.isAudioPlaying = true;
+    // If we have paused audio data, play that first (resume from exact position)
+    if (pausedAudioData && pausedAudioData.audio) {
+      const item = {
+        audio: pausedAudioData.audio,
+        anim: pausedAudioData.anim,
+        text: pausedAudioData.text,
+        delay: pausedAudioData.delay || 0,
+        isRaw: false
+      };
+      // If Web Audio API is suspended, try to resume it
+      if ( this.audioCtx.state === "suspended" || this.audioCtx.state === "interrupted" ) {
+        const resume = this.audioCtx.resume();
+        const timeout = new Promise((_r, rej) => setTimeout(() => rej("p2"), 1000));
+        try {
+          await Promise.race([resume, timeout]);
+        } catch(e) {
+          console.log("Can't play audio. Web Audio API suspended.");
+          this.playAudio(true);
+          return;
+        }
+      }
+      // Store current audio item and start time
+      this.currentAudioItem = {
+        audio: item.audio,
+        anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null,
+        text: item.text,
+        delay: item.delay
+      };
+      // Create audio source
+      this.audioSpeechSource = this.audioCtx.createBufferSource();
+      this.audioSpeechSource.buffer = item.audio;
+      this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
+      this.audioSpeechSource.connect(this.audioAnalyzerNode);
+      const startDelay = item.delay / 1000;
+      this.audioStartTime = this.audioCtx.currentTime + startDelay;
+      this.audioSpeechSource.addEventListener('ended', () => {
+        this.audioSpeechSource.disconnect();
+        this.audioStartTime = null;
+        this.currentAudioItem = null;
+        this.playAudio(true);
+      }, { once: true });
+      // Push trimmed animation data to queue
+      if ( item.anim && item.anim.length > 0 ) {
+        item.anim.forEach( animGroup => {
+          if (animGroup && animGroup.ts && animGroup.ts.length > 0) {
+            const animData = {
+              template: animGroup.template,
+              ts: animGroup.ts.map(ts => this.animClock + ts),
+              vs: animGroup.vs
+            };
+            this.animQueue.push(animData);
+          }
+        });
+      }
+      // Play immediately (no delay for resumed audio)
+      this.audioSpeechSource.start(startDelay);
+      return;
+    }
     if ( this.audioPlaylist.length ) {
       const item = this.audioPlaylist.shift();
@@ -3748,23 +3821,43 @@ class TalkingHead {
         audio = item.audio;
       }
+      // Store current audio item and start time for pause/resume tracking
+      this.currentAudioItem = {
+        audio: audio,
+        anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null, // Deep copy
+        text: item.text,
+        delay: 0
+      };
+      // Calculate delay for pre-animations
+      let delay = 0;
+      if ( item.anim ) {
+        // Find the lowest negative time point, if any
+        if ( !item.isRaw ) {
+          delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
+        }
+        this.currentAudioItem.delay = delay;
+      }
       // Create audio source
       this.audioSpeechSource = this.audioCtx.createBufferSource();
       this.audioSpeechSource.buffer = audio;
       this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
       this.audioSpeechSource.connect(this.audioAnalyzerNode);
+      // Track when audio starts playing (accounting for delay)
+      const startDelay = delay / 1000;
+      this.audioStartTime = this.audioCtx.currentTime + startDelay;
       this.audioSpeechSource.addEventListener('ended', () => {
         this.audioSpeechSource.disconnect();
+        this.audioStartTime = null;
+        this.currentAudioItem = null;
         this.playAudio(true);
       }, { once: true });
       // Rescale lipsync and push to queue
-      let delay = 0;
       if ( item.anim ) {
-        // Find the lowest negative time point, if any
-        if ( !item.isRaw ) {
-          delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
-        }
         item.anim.forEach( x => {
           for(let i=0; i<x.ts.length; i++) {
             x.ts[i] = this.animClock + x.ts[i] + delay;
@@ -3773,8 +3866,8 @@ class TalkingHead {
         });
       }
-      // Play, dealy in seconds so pre-animations can be played
-      this.audioSpeechSource.start(delay/1000);
+      // Play, delay in seconds so pre-animations can be played
+      this.audioSpeechSource.start(startDelay);
     } else {
       this.isAudioPlaying = false;
@@ -4433,18 +4526,109 @@ class TalkingHead {
   /**
   * Pause speaking.
+  * Returns paused audio data with trimmed buffer if audio was playing.
   */
   pauseSpeaking() {
-    try { this.audioSpeechSource.stop(); } catch(error) {}
+    let pausedData = null;
+    // If audio is currently playing, calculate elapsed time and trim buffer
+    if (this.audioSpeechSource && this.currentAudioItem && this.audioStartTime !== null) {
+      try {
+        const currentTime = this.audioCtx.currentTime;
+        const elapsedTime = Math.max(0, currentTime - this.audioStartTime);
+        const playbackRate = this.audioSpeechSource.playbackRate.value;
+        const elapsedInBuffer = elapsedTime * playbackRate;
+        const originalBuffer = this.currentAudioItem.audio;
+        const sampleRate = originalBuffer.sampleRate;
+        const startSample = Math.floor(elapsedInBuffer * sampleRate);
+        // Only trim if we haven't played the entire buffer
+        if (startSample < originalBuffer.length) {
+          // Create trimmed buffer
+          const trimmedLength = originalBuffer.length - startSample;
+          const trimmedBuffer = this.audioCtx.createBuffer(
+            originalBuffer.numberOfChannels,
+            trimmedLength,
+            sampleRate
+          );
+          // Copy remaining samples
+          for (let channel = 0; channel < originalBuffer.numberOfChannels; channel++) {
+            const originalData = originalBuffer.getChannelData(channel);
+            const trimmedData = trimmedBuffer.getChannelData(channel);
+            for (let i = 0; i < trimmedLength; i++) {
+              trimmedData[i] = originalData[startSample + i];
+            }
+          }
+          // Trim animation data (lip-sync) - adjust timestamps
+          let trimmedAnim = null;
+          if (this.currentAudioItem.anim) {
+            // Calculate the absolute time when this audio started (for comparison)
+            const audioStartAnimTime = this.animClock + this.currentAudioItem.delay;
+            const elapsedAnimTime = elapsedTime * 1000; // Convert to ms
+            const currentAnimTime = audioStartAnimTime + elapsedAnimTime;
+            trimmedAnim = this.currentAudioItem.anim.map(animGroup => {
+              const trimmed = {
+                template: animGroup.template,
+                ts: [],
+                vs: []
+              };
+              // Find animations that haven't started yet
+              // animGroup.ts contains absolute timestamps (already adjusted to animClock)
+              for (let i = 0; i < animGroup.ts.length; i++) {
+                const animTimestamp = animGroup.ts[i];
+                // If animation timestamp is in the future (hasn't happened yet)
+                if (animTimestamp > currentAnimTime) {
+                  // Adjust to relative time from resume point (start from 0)
+                  const relativeTime = animTimestamp - currentAnimTime;
+                  trimmed.ts.push(relativeTime);
+                  trimmed.vs.push(animGroup.vs[i]);
+                }
+              }
+              return trimmed.ts.length > 0 ? trimmed : null;
+            }).filter(x => x !== null);
+          }
+          pausedData = {
+            audio: trimmedBuffer,
+            anim: trimmedAnim,
+            text: this.currentAudioItem.text,
+            delay: 0, // No delay needed for trimmed buffer
+            elapsedTime: elapsedTime
+          };
+        }
+        this.audioSpeechSource.stop();
+      } catch(error) {
+        console.warn('Error trimming audio buffer on pause:', error);
+      }
+    } else {
+      // No audio playing, just stop if source exists
+      try { this.audioSpeechSource?.stop(); } catch(error) {}
+    }
     this.audioPlaylist.length = 0;
     this.stateName = 'idle';
     this.isSpeaking = false;
     this.isAudioPlaying = false;
+    this.audioStartTime = null;
+    this.currentAudioItem = null;
+    // Clear viseme animations but keep others
     this.animQueue = this.animQueue.filter( x  => x.template.name !== 'viseme' && x.template.name !== 'subtitles' && x.template.name !== 'blendshapes' );
     if ( this.armature ) {
       this.resetLips();
       this.render();
     }
+    return pausedData;
   }
   /**

package/src/utils/animationLoader.js CHANGED Viewed

@@ -13,13 +13,35 @@ export async function loadAnimationsFromManifest(manifestPath) {
   try {
     const response = await fetch(manifestPath);
     if (!response.ok) {
+      // Don't log error for 404 - manifest is optional
+      if (response.status === 404) {
+        return {};
+      }
       throw new Error(`Failed to fetch manifest: ${response.status} ${response.statusText}`);
     }
-    const manifest = await response.json();
+    // Check if response is actually JSON (not HTML error page)
+    const contentType = response.headers.get('content-type');
+    if (contentType && !contentType.includes('application/json')) {
+      // Response is not JSON (probably HTML error page)
+      return {};
+    }
+    const text = await response.text();
+    // Try to parse JSON, but check if it looks like HTML first
+    if (text.trim().startsWith('<!')) {
+      // This is HTML, not JSON (likely a 404 page)
+      return {};
+    }
+    const manifest = JSON.parse(text);
     const animations = manifest.animations || {};
     return animations;
   } catch (error) {
-    console.error('Failed to load animation manifest:', error);
+    // Only log if it's not a JSON parse error (which we handle above)
+    if (!(error instanceof SyntaxError)) {
+      console.warn('⚠️ Could not load animation manifest (this is optional):', manifestPath);
+    }
     return {};
   }
 }