@sage-rsc/talking-head-react 1.7.6 → 1.7.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +6 -6
- package/dist/index.js +2158 -2038
- package/package.json +1 -1
- package/src/components/SimpleTalkingAvatar.jsx +95 -49
- package/src/lib/talkinghead.mjs +193 -9
- package/src/utils/animationLoader.js +24 -2
package/package.json
CHANGED
|
@@ -81,6 +81,8 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
81
81
|
const currentAnimationGroupRef = useRef(null);
|
|
82
82
|
const playedAnimationsRef = useRef([]); // Track animations played during current speech
|
|
83
83
|
const animationQueueRef = useRef([]); // Queue of animations to play in order
|
|
84
|
+
const currentSentenceIndexRef = useRef(0); // Track which sentence is currently playing
|
|
85
|
+
const pausedAudioDataRef = useRef(null); // Store trimmed audio buffer when paused
|
|
84
86
|
|
|
85
87
|
// Keep ref in sync with state
|
|
86
88
|
useEffect(() => {
|
|
@@ -118,11 +120,12 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
118
120
|
|
|
119
121
|
// Option 1: Load from manifest file only
|
|
120
122
|
if (animations.manifest && !animations.auto) {
|
|
121
|
-
|
|
122
|
-
|
|
123
|
+
const manifestAnimations = await loadAnimationsFromManifest(animations.manifest);
|
|
124
|
+
// loadAnimationsFromManifest returns {} on error, so check if we got anything
|
|
125
|
+
const hasAnimations = Object.keys(manifestAnimations).length > 0;
|
|
126
|
+
if (hasAnimations) {
|
|
123
127
|
setLoadedAnimations(manifestAnimations);
|
|
124
|
-
}
|
|
125
|
-
console.error('Failed to load animation manifest:', error);
|
|
128
|
+
} else {
|
|
126
129
|
setLoadedAnimations(animations);
|
|
127
130
|
}
|
|
128
131
|
}
|
|
@@ -155,7 +158,7 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
155
158
|
folderPaths[`shared_talking`] = `${basePath}/shared/talking`;
|
|
156
159
|
folderPaths[`shared_idle`] = `${basePath}/shared/idle`;
|
|
157
160
|
|
|
158
|
-
|
|
161
|
+
// Loading animations from folders
|
|
159
162
|
const discoveredAnimations = await autoLoadAnimationsFromFolders(folderPaths, avatarBody);
|
|
160
163
|
|
|
161
164
|
// Check if we found any animations
|
|
@@ -406,33 +409,30 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
406
409
|
if (loadedAnimations._genderSpecific) {
|
|
407
410
|
const genderKey = getGenderKey(avatarBody);
|
|
408
411
|
|
|
409
|
-
// Debug: Log gender detection
|
|
410
|
-
console.log(`🔍 Gender detection: avatarBody="${avatarBody}" -> genderKey="${genderKey}"`);
|
|
411
|
-
|
|
412
412
|
const genderGroups = loadedAnimations._genderSpecific[genderKey];
|
|
413
413
|
|
|
414
414
|
// Try gender-specific first
|
|
415
415
|
if (genderGroups && genderGroups[groupName]) {
|
|
416
416
|
group = genderGroups[groupName];
|
|
417
|
-
console.log(`✅ Found ${genderKey} animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
|
|
418
417
|
}
|
|
419
418
|
// Fallback to shared gender-specific animations
|
|
420
419
|
else if (loadedAnimations._genderSpecific.shared && loadedAnimations._genderSpecific.shared[groupName]) {
|
|
421
420
|
group = loadedAnimations._genderSpecific.shared[groupName];
|
|
422
|
-
console.log(`✅ Found shared animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
|
|
423
|
-
} else {
|
|
424
|
-
console.log(`⚠️ No ${genderKey} or shared animations found for "${groupName}"`);
|
|
425
421
|
}
|
|
426
422
|
}
|
|
427
423
|
|
|
428
424
|
// Fallback to root-level animations if gender-specific not found
|
|
429
425
|
if (!group && loadedAnimations[groupName]) {
|
|
430
426
|
group = loadedAnimations[groupName];
|
|
431
|
-
console.log(`✅ Found root-level animations for "${groupName}": ${Array.isArray(group) ? group.length : 1} animation(s)`);
|
|
432
427
|
}
|
|
433
428
|
|
|
434
429
|
if (!group) {
|
|
435
|
-
|
|
430
|
+
// Only log warning if animations were actually configured (not just empty object)
|
|
431
|
+
const hasAnyAnimations = Object.keys(loadedAnimations).length > 0 ||
|
|
432
|
+
(loadedAnimations._genderSpecific && Object.keys(loadedAnimations._genderSpecific).length > 0);
|
|
433
|
+
if (hasAnyAnimations) {
|
|
434
|
+
console.warn(`⚠️ No animations found for group "${groupName}". Make sure animations are configured correctly.`);
|
|
435
|
+
}
|
|
436
436
|
return [];
|
|
437
437
|
}
|
|
438
438
|
|
|
@@ -614,6 +614,7 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
614
614
|
// Split text into sentences for tracking
|
|
615
615
|
const sentences = textToSpeak.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
616
616
|
originalSentencesRef.current = sentences;
|
|
617
|
+
currentSentenceIndexRef.current = 0; // Reset sentence tracking
|
|
617
618
|
|
|
618
619
|
const speakOptions = {
|
|
619
620
|
lipsyncLang: options.lipsyncLang || 'en',
|
|
@@ -693,8 +694,8 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
693
694
|
try {
|
|
694
695
|
// Check if currently speaking
|
|
695
696
|
const isSpeaking = talkingHeadRef.current.isSpeaking || false;
|
|
696
|
-
const audioPlaylist = talkingHeadRef.current.audioPlaylist || [];
|
|
697
|
-
const speechQueue = talkingHeadRef.current.speechQueue || [];
|
|
697
|
+
const audioPlaylist = [...(talkingHeadRef.current.audioPlaylist || [])]; // Copy before pausing
|
|
698
|
+
const speechQueue = [...(talkingHeadRef.current.speechQueue || [])]; // Copy before clearing
|
|
698
699
|
|
|
699
700
|
if (isSpeaking || audioPlaylist.length > 0 || speechQueue.length > 0) {
|
|
700
701
|
// Clear speech end interval
|
|
@@ -703,51 +704,84 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
703
704
|
speechEndIntervalRef.current = null;
|
|
704
705
|
}
|
|
705
706
|
|
|
706
|
-
// Extract
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
707
|
+
// IMPORTANT: Extract text BEFORE calling pauseSpeaking(), which clears audioPlaylist
|
|
708
|
+
// Track which sentences need to be re-spoken
|
|
709
|
+
const sentences = originalSentencesRef.current;
|
|
710
|
+
let remainingSentences = [];
|
|
711
|
+
|
|
712
|
+
// Check if audio is currently playing (item already shifted from playlist)
|
|
713
|
+
const isAudioCurrentlyPlaying = talkingHeadRef.current.isAudioPlaying || false;
|
|
714
|
+
|
|
715
|
+
if (sentences.length > 0) {
|
|
716
|
+
// Calculate which sentence is currently playing
|
|
717
|
+
// Total sentences = sentences.length
|
|
718
|
+
// Queued in audioPlaylist = audioPlaylist.length (already processed by TTS, waiting to play)
|
|
719
|
+
// Queued in speechQueue = speechQueue.length (not yet sent to TTS)
|
|
720
|
+
// Currently playing = 1 (if isAudioPlaying is true)
|
|
721
|
+
|
|
722
|
+
const queuedCount = audioPlaylist.length + speechQueue.length;
|
|
723
|
+
const currentlyPlayingCount = isAudioCurrentlyPlaying ? 1 : 0;
|
|
724
|
+
const processedCount = sentences.length - queuedCount - currentlyPlayingCount;
|
|
725
|
+
|
|
726
|
+
// If audio is currently playing, we're mid-sentence - restart from current sentence
|
|
727
|
+
// Otherwise, continue from next sentence
|
|
728
|
+
const startIndex = isAudioCurrentlyPlaying ? processedCount : processedCount + currentlyPlayingCount;
|
|
729
|
+
|
|
730
|
+
if (startIndex < sentences.length) {
|
|
731
|
+
remainingSentences = sentences.slice(startIndex);
|
|
732
|
+
}
|
|
733
|
+
} else {
|
|
734
|
+
// Fallback: Extract text from queues if we don't have original sentences
|
|
735
|
+
// Extract text from audio playlist (queued audio, not yet playing)
|
|
736
|
+
if (audioPlaylist.length > 0) {
|
|
737
|
+
audioPlaylist.forEach(item => {
|
|
724
738
|
if (item.text) {
|
|
725
739
|
if (Array.isArray(item.text)) {
|
|
726
|
-
|
|
740
|
+
const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
|
|
741
|
+
if (sentenceText.trim()) {
|
|
742
|
+
remainingSentences.push(sentenceText);
|
|
743
|
+
}
|
|
744
|
+
} else if (item.text.trim()) {
|
|
745
|
+
remainingSentences.push(item.text);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Extract remaining text from speech queue (not yet sent to TTS)
|
|
752
|
+
if (speechQueue.length > 0) {
|
|
753
|
+
speechQueue.forEach(item => {
|
|
754
|
+
if (item.text) {
|
|
755
|
+
if (Array.isArray(item.text)) {
|
|
756
|
+
const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
|
|
757
|
+
if (sentenceText.trim()) {
|
|
758
|
+
remainingSentences.push(sentenceText);
|
|
759
|
+
}
|
|
760
|
+
} else if (item.text.trim()) {
|
|
761
|
+
remainingSentences.push(item.text);
|
|
727
762
|
}
|
|
728
|
-
return item.text;
|
|
729
763
|
}
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
.filter(text => text.trim().length > 0)
|
|
733
|
-
.join(' ');
|
|
764
|
+
});
|
|
765
|
+
}
|
|
734
766
|
}
|
|
735
|
-
|
|
736
|
-
// Combine
|
|
737
|
-
const combinedRemainingText =
|
|
738
|
-
? (audioPlaylistText + (remainingText ? ' ' + remainingText : ''))
|
|
739
|
-
: remainingText;
|
|
767
|
+
|
|
768
|
+
// Combine remaining sentences
|
|
769
|
+
const combinedRemainingText = remainingSentences.join(' ');
|
|
740
770
|
|
|
741
771
|
// Store progress for resume
|
|
742
772
|
speechProgressRef.current = {
|
|
743
773
|
remainingText: combinedRemainingText || null,
|
|
744
774
|
originalText: pausedSpeechRef.current?.text || null,
|
|
745
|
-
options: pausedSpeechRef.current?.options || null
|
|
775
|
+
options: pausedSpeechRef.current?.options || null,
|
|
776
|
+
// Track if we're pausing mid-sentence (has currently playing audio)
|
|
777
|
+
isMidSentence: audioPlaylist.length > 0
|
|
746
778
|
};
|
|
747
779
|
|
|
748
|
-
// Clear speech queue and pause
|
|
780
|
+
// Clear speech queue and pause (this will stop audio and clear audioPlaylist)
|
|
781
|
+
// pauseSpeaking() now returns trimmed audio data if audio was playing
|
|
749
782
|
talkingHeadRef.current.speechQueue.length = 0;
|
|
750
|
-
talkingHeadRef.current.pauseSpeaking();
|
|
783
|
+
const pausedAudioData = talkingHeadRef.current.pauseSpeaking();
|
|
784
|
+
pausedAudioDataRef.current = pausedAudioData; // Store trimmed buffer for exact resume
|
|
751
785
|
setIsPaused(true);
|
|
752
786
|
isPausedRef.current = true;
|
|
753
787
|
}
|
|
@@ -767,7 +801,15 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
767
801
|
setIsPaused(false);
|
|
768
802
|
isPausedRef.current = false;
|
|
769
803
|
|
|
770
|
-
//
|
|
804
|
+
// If we have trimmed audio data from pause, resume from exact position
|
|
805
|
+
if (pausedAudioDataRef.current && pausedAudioDataRef.current.audio) {
|
|
806
|
+
// Resume with trimmed buffer (exact position)
|
|
807
|
+
await talkingHeadRef.current.playAudio(false, pausedAudioDataRef.current);
|
|
808
|
+
pausedAudioDataRef.current = null; // Clear after use
|
|
809
|
+
return;
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// Otherwise, resume from remaining text (fallback)
|
|
771
813
|
const remainingText = speechProgressRef.current?.remainingText;
|
|
772
814
|
const originalText = speechProgressRef.current?.originalText || pausedSpeechRef.current?.text;
|
|
773
815
|
const originalOptions = speechProgressRef.current?.options || pausedSpeechRef.current?.options || {};
|
|
@@ -777,10 +819,14 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
777
819
|
if (textToSpeak) {
|
|
778
820
|
speakText(textToSpeak, originalOptions);
|
|
779
821
|
}
|
|
822
|
+
|
|
823
|
+
// Clear paused audio data
|
|
824
|
+
pausedAudioDataRef.current = null;
|
|
780
825
|
} catch (err) {
|
|
781
826
|
console.warn('Error resuming speech:', err);
|
|
782
827
|
setIsPaused(false);
|
|
783
828
|
isPausedRef.current = false;
|
|
829
|
+
pausedAudioDataRef.current = null;
|
|
784
830
|
}
|
|
785
831
|
}, [isPaused, speakText, resumeAudioContext]);
|
|
786
832
|
|
package/src/lib/talkinghead.mjs
CHANGED
|
@@ -835,6 +835,11 @@ class TalkingHead {
|
|
|
835
835
|
this.speechQueue = [];
|
|
836
836
|
this.isSpeaking = false;
|
|
837
837
|
this.isListening = false;
|
|
838
|
+
|
|
839
|
+
// Pause/resume tracking for buffer trimming
|
|
840
|
+
this.audioStartTime = null; // When current audio started playing
|
|
841
|
+
this.currentAudioItem = null; // Current audio item being played
|
|
842
|
+
this.pausedAudioData = null; // Stored trimmed buffer when paused
|
|
838
843
|
|
|
839
844
|
// Setup Google text-to-speech
|
|
840
845
|
if ( this.opt.ttsEndpoint ) {
|
|
@@ -3718,10 +3723,78 @@ class TalkingHead {
|
|
|
3718
3723
|
/**
|
|
3719
3724
|
* Play audio playlist using Web Audio API.
|
|
3720
3725
|
* @param {boolean} [force=false] If true, forces to proceed
|
|
3726
|
+
* @param {Object} [pausedAudioData=null] Trimmed audio data from pause to resume from exact position
|
|
3721
3727
|
*/
|
|
3722
|
-
async playAudio(force=false) {
|
|
3728
|
+
async playAudio(force=false, pausedAudioData=null) {
|
|
3723
3729
|
if ( !this.armature || (this.isAudioPlaying && !force) ) return;
|
|
3724
3730
|
this.isAudioPlaying = true;
|
|
3731
|
+
|
|
3732
|
+
// If we have paused audio data, play that first (resume from exact position)
|
|
3733
|
+
if (pausedAudioData && pausedAudioData.audio) {
|
|
3734
|
+
const item = {
|
|
3735
|
+
audio: pausedAudioData.audio,
|
|
3736
|
+
anim: pausedAudioData.anim,
|
|
3737
|
+
text: pausedAudioData.text,
|
|
3738
|
+
delay: pausedAudioData.delay || 0,
|
|
3739
|
+
isRaw: false
|
|
3740
|
+
};
|
|
3741
|
+
|
|
3742
|
+
// If Web Audio API is suspended, try to resume it
|
|
3743
|
+
if ( this.audioCtx.state === "suspended" || this.audioCtx.state === "interrupted" ) {
|
|
3744
|
+
const resume = this.audioCtx.resume();
|
|
3745
|
+
const timeout = new Promise((_r, rej) => setTimeout(() => rej("p2"), 1000));
|
|
3746
|
+
try {
|
|
3747
|
+
await Promise.race([resume, timeout]);
|
|
3748
|
+
} catch(e) {
|
|
3749
|
+
console.log("Can't play audio. Web Audio API suspended.");
|
|
3750
|
+
this.playAudio(true);
|
|
3751
|
+
return;
|
|
3752
|
+
}
|
|
3753
|
+
}
|
|
3754
|
+
|
|
3755
|
+
// Store current audio item and start time
|
|
3756
|
+
this.currentAudioItem = {
|
|
3757
|
+
audio: item.audio,
|
|
3758
|
+
anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null,
|
|
3759
|
+
text: item.text,
|
|
3760
|
+
delay: item.delay
|
|
3761
|
+
};
|
|
3762
|
+
|
|
3763
|
+
// Create audio source
|
|
3764
|
+
this.audioSpeechSource = this.audioCtx.createBufferSource();
|
|
3765
|
+
this.audioSpeechSource.buffer = item.audio;
|
|
3766
|
+
this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
|
|
3767
|
+
this.audioSpeechSource.connect(this.audioAnalyzerNode);
|
|
3768
|
+
|
|
3769
|
+
const startDelay = item.delay / 1000;
|
|
3770
|
+
this.audioStartTime = this.audioCtx.currentTime + startDelay;
|
|
3771
|
+
|
|
3772
|
+
this.audioSpeechSource.addEventListener('ended', () => {
|
|
3773
|
+
this.audioSpeechSource.disconnect();
|
|
3774
|
+
this.audioStartTime = null;
|
|
3775
|
+
this.currentAudioItem = null;
|
|
3776
|
+
this.playAudio(true);
|
|
3777
|
+
}, { once: true });
|
|
3778
|
+
|
|
3779
|
+
// Push trimmed animation data to queue
|
|
3780
|
+
if ( item.anim && item.anim.length > 0 ) {
|
|
3781
|
+
item.anim.forEach( animGroup => {
|
|
3782
|
+
if (animGroup && animGroup.ts && animGroup.ts.length > 0) {
|
|
3783
|
+
const animData = {
|
|
3784
|
+
template: animGroup.template,
|
|
3785
|
+
ts: animGroup.ts.map(ts => this.animClock + ts),
|
|
3786
|
+
vs: animGroup.vs
|
|
3787
|
+
};
|
|
3788
|
+
this.animQueue.push(animData);
|
|
3789
|
+
}
|
|
3790
|
+
});
|
|
3791
|
+
}
|
|
3792
|
+
|
|
3793
|
+
// Play immediately (no delay for resumed audio)
|
|
3794
|
+
this.audioSpeechSource.start(startDelay);
|
|
3795
|
+
return;
|
|
3796
|
+
}
|
|
3797
|
+
|
|
3725
3798
|
if ( this.audioPlaylist.length ) {
|
|
3726
3799
|
const item = this.audioPlaylist.shift();
|
|
3727
3800
|
|
|
@@ -3748,23 +3821,43 @@ class TalkingHead {
|
|
|
3748
3821
|
audio = item.audio;
|
|
3749
3822
|
}
|
|
3750
3823
|
|
|
3824
|
+
// Store current audio item and start time for pause/resume tracking
|
|
3825
|
+
this.currentAudioItem = {
|
|
3826
|
+
audio: audio,
|
|
3827
|
+
anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null, // Deep copy
|
|
3828
|
+
text: item.text,
|
|
3829
|
+
delay: 0
|
|
3830
|
+
};
|
|
3831
|
+
|
|
3832
|
+
// Calculate delay for pre-animations
|
|
3833
|
+
let delay = 0;
|
|
3834
|
+
if ( item.anim ) {
|
|
3835
|
+
// Find the lowest negative time point, if any
|
|
3836
|
+
if ( !item.isRaw ) {
|
|
3837
|
+
delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
|
|
3838
|
+
}
|
|
3839
|
+
this.currentAudioItem.delay = delay;
|
|
3840
|
+
}
|
|
3841
|
+
|
|
3751
3842
|
// Create audio source
|
|
3752
3843
|
this.audioSpeechSource = this.audioCtx.createBufferSource();
|
|
3753
3844
|
this.audioSpeechSource.buffer = audio;
|
|
3754
3845
|
this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
|
|
3755
3846
|
this.audioSpeechSource.connect(this.audioAnalyzerNode);
|
|
3847
|
+
|
|
3848
|
+
// Track when audio starts playing (accounting for delay)
|
|
3849
|
+
const startDelay = delay / 1000;
|
|
3850
|
+
this.audioStartTime = this.audioCtx.currentTime + startDelay;
|
|
3851
|
+
|
|
3756
3852
|
this.audioSpeechSource.addEventListener('ended', () => {
|
|
3757
3853
|
this.audioSpeechSource.disconnect();
|
|
3854
|
+
this.audioStartTime = null;
|
|
3855
|
+
this.currentAudioItem = null;
|
|
3758
3856
|
this.playAudio(true);
|
|
3759
3857
|
}, { once: true });
|
|
3760
3858
|
|
|
3761
3859
|
// Rescale lipsync and push to queue
|
|
3762
|
-
let delay = 0;
|
|
3763
3860
|
if ( item.anim ) {
|
|
3764
|
-
// Find the lowest negative time point, if any
|
|
3765
|
-
if ( !item.isRaw ) {
|
|
3766
|
-
delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
|
|
3767
|
-
}
|
|
3768
3861
|
item.anim.forEach( x => {
|
|
3769
3862
|
for(let i=0; i<x.ts.length; i++) {
|
|
3770
3863
|
x.ts[i] = this.animClock + x.ts[i] + delay;
|
|
@@ -3773,8 +3866,8 @@ class TalkingHead {
|
|
|
3773
3866
|
});
|
|
3774
3867
|
}
|
|
3775
3868
|
|
|
3776
|
-
// Play,
|
|
3777
|
-
this.audioSpeechSource.start(
|
|
3869
|
+
// Play, delay in seconds so pre-animations can be played
|
|
3870
|
+
this.audioSpeechSource.start(startDelay);
|
|
3778
3871
|
|
|
3779
3872
|
} else {
|
|
3780
3873
|
this.isAudioPlaying = false;
|
|
@@ -4433,18 +4526,109 @@ class TalkingHead {
|
|
|
4433
4526
|
|
|
4434
4527
|
/**
|
|
4435
4528
|
* Pause speaking.
|
|
4529
|
+
* Returns paused audio data with trimmed buffer if audio was playing.
|
|
4436
4530
|
*/
|
|
4437
4531
|
pauseSpeaking() {
|
|
4438
|
-
|
|
4532
|
+
let pausedData = null;
|
|
4533
|
+
|
|
4534
|
+
// If audio is currently playing, calculate elapsed time and trim buffer
|
|
4535
|
+
if (this.audioSpeechSource && this.currentAudioItem && this.audioStartTime !== null) {
|
|
4536
|
+
try {
|
|
4537
|
+
const currentTime = this.audioCtx.currentTime;
|
|
4538
|
+
const elapsedTime = Math.max(0, currentTime - this.audioStartTime);
|
|
4539
|
+
const playbackRate = this.audioSpeechSource.playbackRate.value;
|
|
4540
|
+
const elapsedInBuffer = elapsedTime * playbackRate;
|
|
4541
|
+
|
|
4542
|
+
const originalBuffer = this.currentAudioItem.audio;
|
|
4543
|
+
const sampleRate = originalBuffer.sampleRate;
|
|
4544
|
+
const startSample = Math.floor(elapsedInBuffer * sampleRate);
|
|
4545
|
+
|
|
4546
|
+
// Only trim if we haven't played the entire buffer
|
|
4547
|
+
if (startSample < originalBuffer.length) {
|
|
4548
|
+
// Create trimmed buffer
|
|
4549
|
+
const trimmedLength = originalBuffer.length - startSample;
|
|
4550
|
+
const trimmedBuffer = this.audioCtx.createBuffer(
|
|
4551
|
+
originalBuffer.numberOfChannels,
|
|
4552
|
+
trimmedLength,
|
|
4553
|
+
sampleRate
|
|
4554
|
+
);
|
|
4555
|
+
|
|
4556
|
+
// Copy remaining samples
|
|
4557
|
+
for (let channel = 0; channel < originalBuffer.numberOfChannels; channel++) {
|
|
4558
|
+
const originalData = originalBuffer.getChannelData(channel);
|
|
4559
|
+
const trimmedData = trimmedBuffer.getChannelData(channel);
|
|
4560
|
+
for (let i = 0; i < trimmedLength; i++) {
|
|
4561
|
+
trimmedData[i] = originalData[startSample + i];
|
|
4562
|
+
}
|
|
4563
|
+
}
|
|
4564
|
+
|
|
4565
|
+
// Trim animation data (lip-sync) - adjust timestamps
|
|
4566
|
+
let trimmedAnim = null;
|
|
4567
|
+
if (this.currentAudioItem.anim) {
|
|
4568
|
+
// Calculate the absolute time when this audio started (for comparison)
|
|
4569
|
+
const audioStartAnimTime = this.animClock + this.currentAudioItem.delay;
|
|
4570
|
+
const elapsedAnimTime = elapsedTime * 1000; // Convert to ms
|
|
4571
|
+
const currentAnimTime = audioStartAnimTime + elapsedAnimTime;
|
|
4572
|
+
|
|
4573
|
+
trimmedAnim = this.currentAudioItem.anim.map(animGroup => {
|
|
4574
|
+
const trimmed = {
|
|
4575
|
+
template: animGroup.template,
|
|
4576
|
+
ts: [],
|
|
4577
|
+
vs: []
|
|
4578
|
+
};
|
|
4579
|
+
|
|
4580
|
+
// Find animations that haven't started yet
|
|
4581
|
+
// animGroup.ts contains absolute timestamps (already adjusted to animClock)
|
|
4582
|
+
for (let i = 0; i < animGroup.ts.length; i++) {
|
|
4583
|
+
const animTimestamp = animGroup.ts[i];
|
|
4584
|
+
|
|
4585
|
+
// If animation timestamp is in the future (hasn't happened yet)
|
|
4586
|
+
if (animTimestamp > currentAnimTime) {
|
|
4587
|
+
// Adjust to relative time from resume point (start from 0)
|
|
4588
|
+
const relativeTime = animTimestamp - currentAnimTime;
|
|
4589
|
+
trimmed.ts.push(relativeTime);
|
|
4590
|
+
trimmed.vs.push(animGroup.vs[i]);
|
|
4591
|
+
}
|
|
4592
|
+
}
|
|
4593
|
+
|
|
4594
|
+
return trimmed.ts.length > 0 ? trimmed : null;
|
|
4595
|
+
}).filter(x => x !== null);
|
|
4596
|
+
}
|
|
4597
|
+
|
|
4598
|
+
pausedData = {
|
|
4599
|
+
audio: trimmedBuffer,
|
|
4600
|
+
anim: trimmedAnim,
|
|
4601
|
+
text: this.currentAudioItem.text,
|
|
4602
|
+
delay: 0, // No delay needed for trimmed buffer
|
|
4603
|
+
elapsedTime: elapsedTime
|
|
4604
|
+
};
|
|
4605
|
+
}
|
|
4606
|
+
|
|
4607
|
+
this.audioSpeechSource.stop();
|
|
4608
|
+
} catch(error) {
|
|
4609
|
+
console.warn('Error trimming audio buffer on pause:', error);
|
|
4610
|
+
}
|
|
4611
|
+
} else {
|
|
4612
|
+
// No audio playing, just stop if source exists
|
|
4613
|
+
try { this.audioSpeechSource?.stop(); } catch(error) {}
|
|
4614
|
+
}
|
|
4615
|
+
|
|
4439
4616
|
this.audioPlaylist.length = 0;
|
|
4440
4617
|
this.stateName = 'idle';
|
|
4441
4618
|
this.isSpeaking = false;
|
|
4442
4619
|
this.isAudioPlaying = false;
|
|
4620
|
+
this.audioStartTime = null;
|
|
4621
|
+
this.currentAudioItem = null;
|
|
4622
|
+
|
|
4623
|
+
// Clear viseme animations but keep others
|
|
4443
4624
|
this.animQueue = this.animQueue.filter( x => x.template.name !== 'viseme' && x.template.name !== 'subtitles' && x.template.name !== 'blendshapes' );
|
|
4625
|
+
|
|
4444
4626
|
if ( this.armature ) {
|
|
4445
4627
|
this.resetLips();
|
|
4446
4628
|
this.render();
|
|
4447
4629
|
}
|
|
4630
|
+
|
|
4631
|
+
return pausedData;
|
|
4448
4632
|
}
|
|
4449
4633
|
|
|
4450
4634
|
/**
|
|
@@ -13,13 +13,35 @@ export async function loadAnimationsFromManifest(manifestPath) {
|
|
|
13
13
|
try {
|
|
14
14
|
const response = await fetch(manifestPath);
|
|
15
15
|
if (!response.ok) {
|
|
16
|
+
// Don't log error for 404 - manifest is optional
|
|
17
|
+
if (response.status === 404) {
|
|
18
|
+
return {};
|
|
19
|
+
}
|
|
16
20
|
throw new Error(`Failed to fetch manifest: ${response.status} ${response.statusText}`);
|
|
17
21
|
}
|
|
18
|
-
|
|
22
|
+
|
|
23
|
+
// Check if response is actually JSON (not HTML error page)
|
|
24
|
+
const contentType = response.headers.get('content-type');
|
|
25
|
+
if (contentType && !contentType.includes('application/json')) {
|
|
26
|
+
// Response is not JSON (probably HTML error page)
|
|
27
|
+
return {};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const text = await response.text();
|
|
31
|
+
// Try to parse JSON, but check if it looks like HTML first
|
|
32
|
+
if (text.trim().startsWith('<!')) {
|
|
33
|
+
// This is HTML, not JSON (likely a 404 page)
|
|
34
|
+
return {};
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const manifest = JSON.parse(text);
|
|
19
38
|
const animations = manifest.animations || {};
|
|
20
39
|
return animations;
|
|
21
40
|
} catch (error) {
|
|
22
|
-
|
|
41
|
+
// Only log if it's not a JSON parse error (which we handle above)
|
|
42
|
+
if (!(error instanceof SyntaxError)) {
|
|
43
|
+
console.warn('⚠️ Could not load animation manifest (this is optional):', manifestPath);
|
|
44
|
+
}
|
|
23
45
|
return {};
|
|
24
46
|
}
|
|
25
47
|
}
|