@sage-rsc/talking-head-react 1.7.7 → 1.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +4 -4
- package/dist/index.js +2001 -1873
- package/package.json +1 -1
- package/src/components/SimpleTalkingAvatar.jsx +140 -37
- package/src/lib/talkinghead.mjs +197 -9
package/package.json
CHANGED
|
@@ -81,6 +81,8 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
81
81
|
const currentAnimationGroupRef = useRef(null);
|
|
82
82
|
const playedAnimationsRef = useRef([]); // Track animations played during current speech
|
|
83
83
|
const animationQueueRef = useRef([]); // Queue of animations to play in order
|
|
84
|
+
const currentSentenceIndexRef = useRef(0); // Track which sentence is currently playing
|
|
85
|
+
const pausedAudioDataRef = useRef(null); // Store trimmed audio buffer when paused
|
|
84
86
|
|
|
85
87
|
// Keep ref in sync with state
|
|
86
88
|
useEffect(() => {
|
|
@@ -612,6 +614,7 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
612
614
|
// Split text into sentences for tracking
|
|
613
615
|
const sentences = textToSpeak.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
614
616
|
originalSentencesRef.current = sentences;
|
|
617
|
+
currentSentenceIndexRef.current = 0; // Reset sentence tracking
|
|
615
618
|
|
|
616
619
|
const speakOptions = {
|
|
617
620
|
lipsyncLang: options.lipsyncLang || 'en',
|
|
@@ -691,8 +694,8 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
691
694
|
try {
|
|
692
695
|
// Check if currently speaking
|
|
693
696
|
const isSpeaking = talkingHeadRef.current.isSpeaking || false;
|
|
694
|
-
const audioPlaylist = talkingHeadRef.current.audioPlaylist || [];
|
|
695
|
-
const speechQueue = talkingHeadRef.current.speechQueue || [];
|
|
697
|
+
const audioPlaylist = [...(talkingHeadRef.current.audioPlaylist || [])]; // Copy before pausing
|
|
698
|
+
const speechQueue = [...(talkingHeadRef.current.speechQueue || [])]; // Copy before clearing
|
|
696
699
|
|
|
697
700
|
if (isSpeaking || audioPlaylist.length > 0 || speechQueue.length > 0) {
|
|
698
701
|
// Clear speech end interval
|
|
@@ -701,51 +704,100 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
701
704
|
speechEndIntervalRef.current = null;
|
|
702
705
|
}
|
|
703
706
|
|
|
704
|
-
// Extract
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
707
|
+
// IMPORTANT: Extract text BEFORE calling pauseSpeaking(), which clears audioPlaylist
|
|
708
|
+
// Track which sentences need to be re-spoken
|
|
709
|
+
const sentences = originalSentencesRef.current;
|
|
710
|
+
let remainingSentences = [];
|
|
711
|
+
|
|
712
|
+
// Check if audio is currently playing (item already shifted from playlist)
|
|
713
|
+
const isAudioCurrentlyPlaying = talkingHeadRef.current.isAudioPlaying || false;
|
|
714
|
+
|
|
715
|
+
if (sentences.length > 0) {
|
|
716
|
+
// Calculate which sentence is currently playing
|
|
717
|
+
// Total sentences = sentences.length
|
|
718
|
+
// Queued in audioPlaylist = audioPlaylist.length (already processed by TTS, waiting to play)
|
|
719
|
+
// Queued in speechQueue = speechQueue.length (not yet sent to TTS)
|
|
720
|
+
// Currently playing = 1 (if isAudioPlaying is true)
|
|
721
|
+
|
|
722
|
+
const queuedCount = audioPlaylist.length + speechQueue.length;
|
|
723
|
+
const currentlyPlayingCount = isAudioCurrentlyPlaying ? 1 : 0;
|
|
724
|
+
const processedCount = sentences.length - queuedCount - currentlyPlayingCount;
|
|
725
|
+
|
|
726
|
+
// If audio is currently playing, we're mid-sentence - restart from current sentence
|
|
727
|
+
// Otherwise, continue from next sentence
|
|
728
|
+
const startIndex = isAudioCurrentlyPlaying ? processedCount : processedCount + currentlyPlayingCount;
|
|
729
|
+
|
|
730
|
+
if (startIndex < sentences.length) {
|
|
731
|
+
remainingSentences = sentences.slice(startIndex);
|
|
732
|
+
}
|
|
733
|
+
} else {
|
|
734
|
+
// Fallback: Extract text from queues if we don't have original sentences
|
|
735
|
+
// Extract text from audio playlist (queued audio, not yet playing)
|
|
736
|
+
if (audioPlaylist.length > 0) {
|
|
737
|
+
audioPlaylist.forEach(item => {
|
|
722
738
|
if (item.text) {
|
|
723
739
|
if (Array.isArray(item.text)) {
|
|
724
|
-
|
|
740
|
+
const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
|
|
741
|
+
if (sentenceText.trim()) {
|
|
742
|
+
remainingSentences.push(sentenceText);
|
|
743
|
+
}
|
|
744
|
+
} else if (item.text.trim()) {
|
|
745
|
+
remainingSentences.push(item.text);
|
|
725
746
|
}
|
|
726
|
-
return item.text;
|
|
727
747
|
}
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
748
|
+
});
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Extract remaining text from speech queue (not yet sent to TTS)
|
|
752
|
+
if (speechQueue.length > 0) {
|
|
753
|
+
speechQueue.forEach(item => {
|
|
754
|
+
if (item.text) {
|
|
755
|
+
if (Array.isArray(item.text)) {
|
|
756
|
+
const sentenceText = item.text.map(wordObj => wordObj.word).join(' ');
|
|
757
|
+
if (sentenceText.trim()) {
|
|
758
|
+
remainingSentences.push(sentenceText);
|
|
759
|
+
}
|
|
760
|
+
} else if (item.text.trim()) {
|
|
761
|
+
remainingSentences.push(item.text);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
});
|
|
765
|
+
}
|
|
732
766
|
}
|
|
733
|
-
|
|
734
|
-
// Combine
|
|
735
|
-
const combinedRemainingText =
|
|
736
|
-
? (audioPlaylistText + (remainingText ? ' ' + remainingText : ''))
|
|
737
|
-
: remainingText;
|
|
767
|
+
|
|
768
|
+
// Combine remaining sentences
|
|
769
|
+
const combinedRemainingText = remainingSentences.join(' ');
|
|
738
770
|
|
|
739
771
|
// Store progress for resume
|
|
740
772
|
speechProgressRef.current = {
|
|
741
773
|
remainingText: combinedRemainingText || null,
|
|
742
774
|
originalText: pausedSpeechRef.current?.text || null,
|
|
743
|
-
options: pausedSpeechRef.current?.options || null
|
|
775
|
+
options: pausedSpeechRef.current?.options || null,
|
|
776
|
+
// Track if we're pausing mid-sentence (has currently playing audio)
|
|
777
|
+
isMidSentence: audioPlaylist.length > 0
|
|
744
778
|
};
|
|
745
779
|
|
|
746
|
-
//
|
|
747
|
-
|
|
748
|
-
talkingHeadRef.current.
|
|
780
|
+
// IMPORTANT: Save speech queue BEFORE pausing if audio is playing
|
|
781
|
+
// We need to preserve it so remaining text continues after trimmed audio
|
|
782
|
+
const wasAudioPlaying = talkingHeadRef.current.isAudioPlaying || false;
|
|
783
|
+
const savedSpeechQueue = wasAudioPlaying ? [...(talkingHeadRef.current.speechQueue || [])] : null;
|
|
784
|
+
|
|
785
|
+
// Pause and get trimmed buffer if available
|
|
786
|
+
// pauseSpeaking() clears audioPlaylist but doesn't clear speechQueue
|
|
787
|
+
const pausedAudioData = talkingHeadRef.current.pauseSpeaking();
|
|
788
|
+
|
|
789
|
+
// If we have trimmed buffer, restore speech queue for continuation
|
|
790
|
+
// Otherwise, clear it (normal pause behavior)
|
|
791
|
+
if (pausedAudioData && pausedAudioData.audio && savedSpeechQueue) {
|
|
792
|
+
// Restore speech queue so remaining text continues after trimmed audio
|
|
793
|
+
talkingHeadRef.current.speechQueue.length = 0;
|
|
794
|
+
talkingHeadRef.current.speechQueue.push(...savedSpeechQueue);
|
|
795
|
+
} else {
|
|
796
|
+
// No trimmed buffer, clear speech queue normally
|
|
797
|
+
talkingHeadRef.current.speechQueue.length = 0;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
pausedAudioDataRef.current = pausedAudioData; // Store trimmed buffer for exact resume
|
|
749
801
|
setIsPaused(true);
|
|
750
802
|
isPausedRef.current = true;
|
|
751
803
|
}
|
|
@@ -765,7 +817,54 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
765
817
|
setIsPaused(false);
|
|
766
818
|
isPausedRef.current = false;
|
|
767
819
|
|
|
768
|
-
//
|
|
820
|
+
// If we have trimmed audio data from pause, resume from exact position
|
|
821
|
+
if (pausedAudioDataRef.current && pausedAudioDataRef.current.audio) {
|
|
822
|
+
// Ensure speaking state is set for animations to continue
|
|
823
|
+
isSpeakingRef.current = true;
|
|
824
|
+
|
|
825
|
+
// Restore animation group if it was set
|
|
826
|
+
const originalOptions = speechProgressRef.current?.options || pausedSpeechRef.current?.options || {};
|
|
827
|
+
const animationGroup = originalOptions.animationGroup || autoAnimationGroup;
|
|
828
|
+
if (animationGroup) {
|
|
829
|
+
currentAnimationGroupRef.current = animationGroup;
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
// Ensure remaining text is in speech queue so it continues after trimmed buffer
|
|
833
|
+
const remainingText = speechProgressRef.current?.remainingText;
|
|
834
|
+
if (remainingText && talkingHeadRef.current.speechQueue) {
|
|
835
|
+
// Re-add remaining text to speech queue so it continues after trimmed audio
|
|
836
|
+
// Split into sentences and add to queue
|
|
837
|
+
const sentences = remainingText.split(/[.!?]+/).filter(s => s.trim().length > 0);
|
|
838
|
+
sentences.forEach(sentence => {
|
|
839
|
+
talkingHeadRef.current.speechQueue.push({
|
|
840
|
+
text: sentence.trim(),
|
|
841
|
+
options: originalOptions
|
|
842
|
+
});
|
|
843
|
+
});
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
// Mark as speaking so audio continues and speech queue processes
|
|
847
|
+
talkingHeadRef.current.isSpeaking = true;
|
|
848
|
+
|
|
849
|
+
// Resume with trimmed buffer (exact position)
|
|
850
|
+
// After trimmed buffer finishes, playAudio will call startSpeaking() which processes speechQueue
|
|
851
|
+
await talkingHeadRef.current.playAudio(false, pausedAudioDataRef.current);
|
|
852
|
+
|
|
853
|
+
// Continue animations if animation group is set
|
|
854
|
+
// Start immediately and they will continue as long as isSpeakingRef is true
|
|
855
|
+
if (animationGroup && !originalOptions.skipAnimation) {
|
|
856
|
+
// Reset animation queue for smooth continuation
|
|
857
|
+
animationQueueRef.current = [];
|
|
858
|
+
playedAnimationsRef.current = [];
|
|
859
|
+
// Start playing animations immediately
|
|
860
|
+
playRandomAnimation(animationGroup);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
pausedAudioDataRef.current = null; // Clear after use
|
|
864
|
+
return;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
// Otherwise, resume from remaining text (fallback)
|
|
769
868
|
const remainingText = speechProgressRef.current?.remainingText;
|
|
770
869
|
const originalText = speechProgressRef.current?.originalText || pausedSpeechRef.current?.text;
|
|
771
870
|
const originalOptions = speechProgressRef.current?.options || pausedSpeechRef.current?.options || {};
|
|
@@ -775,12 +874,16 @@ const SimpleTalkingAvatar = forwardRef(({
|
|
|
775
874
|
if (textToSpeak) {
|
|
776
875
|
speakText(textToSpeak, originalOptions);
|
|
777
876
|
}
|
|
877
|
+
|
|
878
|
+
// Clear paused audio data
|
|
879
|
+
pausedAudioDataRef.current = null;
|
|
778
880
|
} catch (err) {
|
|
779
881
|
console.warn('Error resuming speech:', err);
|
|
780
882
|
setIsPaused(false);
|
|
781
883
|
isPausedRef.current = false;
|
|
884
|
+
pausedAudioDataRef.current = null;
|
|
782
885
|
}
|
|
783
|
-
}, [isPaused, speakText, resumeAudioContext]);
|
|
886
|
+
}, [isPaused, speakText, resumeAudioContext, autoAnimationGroup, playRandomAnimation]);
|
|
784
887
|
|
|
785
888
|
// Stop speaking
|
|
786
889
|
const stopSpeaking = useCallback(() => {
|
package/src/lib/talkinghead.mjs
CHANGED
|
@@ -835,6 +835,11 @@ class TalkingHead {
|
|
|
835
835
|
this.speechQueue = [];
|
|
836
836
|
this.isSpeaking = false;
|
|
837
837
|
this.isListening = false;
|
|
838
|
+
|
|
839
|
+
// Pause/resume tracking for buffer trimming
|
|
840
|
+
this.audioStartTime = null; // When current audio started playing
|
|
841
|
+
this.currentAudioItem = null; // Current audio item being played
|
|
842
|
+
this.pausedAudioData = null; // Stored trimmed buffer when paused
|
|
838
843
|
|
|
839
844
|
// Setup Google text-to-speech
|
|
840
845
|
if ( this.opt.ttsEndpoint ) {
|
|
@@ -3718,10 +3723,80 @@ class TalkingHead {
|
|
|
3718
3723
|
/**
|
|
3719
3724
|
* Play audio playlist using Web Audio API.
|
|
3720
3725
|
* @param {boolean} [force=false] If true, forces to proceed
|
|
3726
|
+
* @param {Object} [pausedAudioData=null] Trimmed audio data from pause to resume from exact position
|
|
3721
3727
|
*/
|
|
3722
|
-
async playAudio(force=false) {
|
|
3728
|
+
async playAudio(force=false, pausedAudioData=null) {
|
|
3723
3729
|
if ( !this.armature || (this.isAudioPlaying && !force) ) return;
|
|
3724
3730
|
this.isAudioPlaying = true;
|
|
3731
|
+
|
|
3732
|
+
// If we have paused audio data, play that first (resume from exact position)
|
|
3733
|
+
if (pausedAudioData && pausedAudioData.audio) {
|
|
3734
|
+
const item = {
|
|
3735
|
+
audio: pausedAudioData.audio,
|
|
3736
|
+
anim: pausedAudioData.anim,
|
|
3737
|
+
text: pausedAudioData.text,
|
|
3738
|
+
delay: pausedAudioData.delay || 0,
|
|
3739
|
+
isRaw: false
|
|
3740
|
+
};
|
|
3741
|
+
|
|
3742
|
+
// If Web Audio API is suspended, try to resume it
|
|
3743
|
+
if ( this.audioCtx.state === "suspended" || this.audioCtx.state === "interrupted" ) {
|
|
3744
|
+
const resume = this.audioCtx.resume();
|
|
3745
|
+
const timeout = new Promise((_r, rej) => setTimeout(() => rej("p2"), 1000));
|
|
3746
|
+
try {
|
|
3747
|
+
await Promise.race([resume, timeout]);
|
|
3748
|
+
} catch(e) {
|
|
3749
|
+
console.log("Can't play audio. Web Audio API suspended.");
|
|
3750
|
+
this.playAudio(true);
|
|
3751
|
+
return;
|
|
3752
|
+
}
|
|
3753
|
+
}
|
|
3754
|
+
|
|
3755
|
+
// Store current audio item and start time
|
|
3756
|
+
this.currentAudioItem = {
|
|
3757
|
+
audio: item.audio,
|
|
3758
|
+
anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null,
|
|
3759
|
+
text: item.text,
|
|
3760
|
+
delay: item.delay
|
|
3761
|
+
};
|
|
3762
|
+
|
|
3763
|
+
// Create audio source
|
|
3764
|
+
this.audioSpeechSource = this.audioCtx.createBufferSource();
|
|
3765
|
+
this.audioSpeechSource.buffer = item.audio;
|
|
3766
|
+
this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
|
|
3767
|
+
this.audioSpeechSource.connect(this.audioAnalyzerNode);
|
|
3768
|
+
|
|
3769
|
+
const startDelay = item.delay / 1000;
|
|
3770
|
+
this.audioStartTime = this.audioCtx.currentTime + startDelay;
|
|
3771
|
+
|
|
3772
|
+
this.audioSpeechSource.addEventListener('ended', () => {
|
|
3773
|
+
this.audioSpeechSource.disconnect();
|
|
3774
|
+
this.audioStartTime = null;
|
|
3775
|
+
this.currentAudioItem = null;
|
|
3776
|
+
// Ensure isSpeaking is true so startSpeaking() processes the queue after trimmed audio
|
|
3777
|
+
this.isSpeaking = true;
|
|
3778
|
+
this.playAudio(true);
|
|
3779
|
+
}, { once: true });
|
|
3780
|
+
|
|
3781
|
+
// Push trimmed animation data to queue
|
|
3782
|
+
if ( item.anim && item.anim.length > 0 ) {
|
|
3783
|
+
item.anim.forEach( animGroup => {
|
|
3784
|
+
if (animGroup && animGroup.ts && animGroup.ts.length > 0) {
|
|
3785
|
+
const animData = {
|
|
3786
|
+
template: animGroup.template,
|
|
3787
|
+
ts: animGroup.ts.map(ts => this.animClock + ts),
|
|
3788
|
+
vs: animGroup.vs
|
|
3789
|
+
};
|
|
3790
|
+
this.animQueue.push(animData);
|
|
3791
|
+
}
|
|
3792
|
+
});
|
|
3793
|
+
}
|
|
3794
|
+
|
|
3795
|
+
// Play immediately (no delay for resumed audio)
|
|
3796
|
+
this.audioSpeechSource.start(startDelay);
|
|
3797
|
+
return;
|
|
3798
|
+
}
|
|
3799
|
+
|
|
3725
3800
|
if ( this.audioPlaylist.length ) {
|
|
3726
3801
|
const item = this.audioPlaylist.shift();
|
|
3727
3802
|
|
|
@@ -3748,23 +3823,45 @@ class TalkingHead {
|
|
|
3748
3823
|
audio = item.audio;
|
|
3749
3824
|
}
|
|
3750
3825
|
|
|
3826
|
+
// Store current audio item and start time for pause/resume tracking
|
|
3827
|
+
this.currentAudioItem = {
|
|
3828
|
+
audio: audio,
|
|
3829
|
+
anim: item.anim ? JSON.parse(JSON.stringify(item.anim)) : null, // Deep copy
|
|
3830
|
+
text: item.text,
|
|
3831
|
+
delay: 0
|
|
3832
|
+
};
|
|
3833
|
+
|
|
3834
|
+
// Calculate delay for pre-animations
|
|
3835
|
+
let delay = 0;
|
|
3836
|
+
if ( item.anim ) {
|
|
3837
|
+
// Find the lowest negative time point, if any
|
|
3838
|
+
if ( !item.isRaw ) {
|
|
3839
|
+
delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
|
|
3840
|
+
}
|
|
3841
|
+
this.currentAudioItem.delay = delay;
|
|
3842
|
+
}
|
|
3843
|
+
|
|
3751
3844
|
// Create audio source
|
|
3752
3845
|
this.audioSpeechSource = this.audioCtx.createBufferSource();
|
|
3753
3846
|
this.audioSpeechSource.buffer = audio;
|
|
3754
3847
|
this.audioSpeechSource.playbackRate.value = 1 / this.animSlowdownRate;
|
|
3755
3848
|
this.audioSpeechSource.connect(this.audioAnalyzerNode);
|
|
3849
|
+
|
|
3850
|
+
// Track when audio starts playing (accounting for delay)
|
|
3851
|
+
const startDelay = delay / 1000;
|
|
3852
|
+
this.audioStartTime = this.audioCtx.currentTime + startDelay;
|
|
3853
|
+
|
|
3756
3854
|
this.audioSpeechSource.addEventListener('ended', () => {
|
|
3757
3855
|
this.audioSpeechSource.disconnect();
|
|
3856
|
+
this.audioStartTime = null;
|
|
3857
|
+
this.currentAudioItem = null;
|
|
3858
|
+
// Ensure isSpeaking is true so startSpeaking() processes the queue
|
|
3859
|
+
this.isSpeaking = true;
|
|
3758
3860
|
this.playAudio(true);
|
|
3759
3861
|
}, { once: true });
|
|
3760
3862
|
|
|
3761
3863
|
// Rescale lipsync and push to queue
|
|
3762
|
-
let delay = 0;
|
|
3763
3864
|
if ( item.anim ) {
|
|
3764
|
-
// Find the lowest negative time point, if any
|
|
3765
|
-
if ( !item.isRaw ) {
|
|
3766
|
-
delay = Math.abs(Math.min(0, ...item.anim.map( x => Math.min(...x.ts) ) ) );
|
|
3767
|
-
}
|
|
3768
3865
|
item.anim.forEach( x => {
|
|
3769
3866
|
for(let i=0; i<x.ts.length; i++) {
|
|
3770
3867
|
x.ts[i] = this.animClock + x.ts[i] + delay;
|
|
@@ -3773,8 +3870,8 @@ class TalkingHead {
|
|
|
3773
3870
|
});
|
|
3774
3871
|
}
|
|
3775
3872
|
|
|
3776
|
-
// Play,
|
|
3777
|
-
this.audioSpeechSource.start(
|
|
3873
|
+
// Play, delay in seconds so pre-animations can be played
|
|
3874
|
+
this.audioSpeechSource.start(startDelay);
|
|
3778
3875
|
|
|
3779
3876
|
} else {
|
|
3780
3877
|
this.isAudioPlaying = false;
|
|
@@ -4433,18 +4530,109 @@ class TalkingHead {
|
|
|
4433
4530
|
|
|
4434
4531
|
/**
|
|
4435
4532
|
* Pause speaking.
|
|
4533
|
+
* Returns paused audio data with trimmed buffer if audio was playing.
|
|
4436
4534
|
*/
|
|
4437
4535
|
pauseSpeaking() {
|
|
4438
|
-
|
|
4536
|
+
let pausedData = null;
|
|
4537
|
+
|
|
4538
|
+
// If audio is currently playing, calculate elapsed time and trim buffer
|
|
4539
|
+
if (this.audioSpeechSource && this.currentAudioItem && this.audioStartTime !== null) {
|
|
4540
|
+
try {
|
|
4541
|
+
const currentTime = this.audioCtx.currentTime;
|
|
4542
|
+
const elapsedTime = Math.max(0, currentTime - this.audioStartTime);
|
|
4543
|
+
const playbackRate = this.audioSpeechSource.playbackRate.value;
|
|
4544
|
+
const elapsedInBuffer = elapsedTime * playbackRate;
|
|
4545
|
+
|
|
4546
|
+
const originalBuffer = this.currentAudioItem.audio;
|
|
4547
|
+
const sampleRate = originalBuffer.sampleRate;
|
|
4548
|
+
const startSample = Math.floor(elapsedInBuffer * sampleRate);
|
|
4549
|
+
|
|
4550
|
+
// Only trim if we haven't played the entire buffer
|
|
4551
|
+
if (startSample < originalBuffer.length) {
|
|
4552
|
+
// Create trimmed buffer
|
|
4553
|
+
const trimmedLength = originalBuffer.length - startSample;
|
|
4554
|
+
const trimmedBuffer = this.audioCtx.createBuffer(
|
|
4555
|
+
originalBuffer.numberOfChannels,
|
|
4556
|
+
trimmedLength,
|
|
4557
|
+
sampleRate
|
|
4558
|
+
);
|
|
4559
|
+
|
|
4560
|
+
// Copy remaining samples
|
|
4561
|
+
for (let channel = 0; channel < originalBuffer.numberOfChannels; channel++) {
|
|
4562
|
+
const originalData = originalBuffer.getChannelData(channel);
|
|
4563
|
+
const trimmedData = trimmedBuffer.getChannelData(channel);
|
|
4564
|
+
for (let i = 0; i < trimmedLength; i++) {
|
|
4565
|
+
trimmedData[i] = originalData[startSample + i];
|
|
4566
|
+
}
|
|
4567
|
+
}
|
|
4568
|
+
|
|
4569
|
+
// Trim animation data (lip-sync) - adjust timestamps
|
|
4570
|
+
let trimmedAnim = null;
|
|
4571
|
+
if (this.currentAudioItem.anim) {
|
|
4572
|
+
// Calculate the absolute time when this audio started (for comparison)
|
|
4573
|
+
const audioStartAnimTime = this.animClock + this.currentAudioItem.delay;
|
|
4574
|
+
const elapsedAnimTime = elapsedTime * 1000; // Convert to ms
|
|
4575
|
+
const currentAnimTime = audioStartAnimTime + elapsedAnimTime;
|
|
4576
|
+
|
|
4577
|
+
trimmedAnim = this.currentAudioItem.anim.map(animGroup => {
|
|
4578
|
+
const trimmed = {
|
|
4579
|
+
template: animGroup.template,
|
|
4580
|
+
ts: [],
|
|
4581
|
+
vs: []
|
|
4582
|
+
};
|
|
4583
|
+
|
|
4584
|
+
// Find animations that haven't started yet
|
|
4585
|
+
// animGroup.ts contains absolute timestamps (already adjusted to animClock)
|
|
4586
|
+
for (let i = 0; i < animGroup.ts.length; i++) {
|
|
4587
|
+
const animTimestamp = animGroup.ts[i];
|
|
4588
|
+
|
|
4589
|
+
// If animation timestamp is in the future (hasn't happened yet)
|
|
4590
|
+
if (animTimestamp > currentAnimTime) {
|
|
4591
|
+
// Adjust to relative time from resume point (start from 0)
|
|
4592
|
+
const relativeTime = animTimestamp - currentAnimTime;
|
|
4593
|
+
trimmed.ts.push(relativeTime);
|
|
4594
|
+
trimmed.vs.push(animGroup.vs[i]);
|
|
4595
|
+
}
|
|
4596
|
+
}
|
|
4597
|
+
|
|
4598
|
+
return trimmed.ts.length > 0 ? trimmed : null;
|
|
4599
|
+
}).filter(x => x !== null);
|
|
4600
|
+
}
|
|
4601
|
+
|
|
4602
|
+
pausedData = {
|
|
4603
|
+
audio: trimmedBuffer,
|
|
4604
|
+
anim: trimmedAnim,
|
|
4605
|
+
text: this.currentAudioItem.text,
|
|
4606
|
+
delay: 0, // No delay needed for trimmed buffer
|
|
4607
|
+
elapsedTime: elapsedTime
|
|
4608
|
+
};
|
|
4609
|
+
}
|
|
4610
|
+
|
|
4611
|
+
this.audioSpeechSource.stop();
|
|
4612
|
+
} catch(error) {
|
|
4613
|
+
console.warn('Error trimming audio buffer on pause:', error);
|
|
4614
|
+
}
|
|
4615
|
+
} else {
|
|
4616
|
+
// No audio playing, just stop if source exists
|
|
4617
|
+
try { this.audioSpeechSource?.stop(); } catch(error) {}
|
|
4618
|
+
}
|
|
4619
|
+
|
|
4439
4620
|
this.audioPlaylist.length = 0;
|
|
4440
4621
|
this.stateName = 'idle';
|
|
4441
4622
|
this.isSpeaking = false;
|
|
4442
4623
|
this.isAudioPlaying = false;
|
|
4624
|
+
this.audioStartTime = null;
|
|
4625
|
+
this.currentAudioItem = null;
|
|
4626
|
+
|
|
4627
|
+
// Clear viseme animations but keep others
|
|
4443
4628
|
this.animQueue = this.animQueue.filter( x => x.template.name !== 'viseme' && x.template.name !== 'subtitles' && x.template.name !== 'blendshapes' );
|
|
4629
|
+
|
|
4444
4630
|
if ( this.armature ) {
|
|
4445
4631
|
this.resetLips();
|
|
4446
4632
|
this.render();
|
|
4447
4633
|
}
|
|
4634
|
+
|
|
4635
|
+
return pausedData;
|
|
4448
4636
|
}
|
|
4449
4637
|
|
|
4450
4638
|
/**
|