npm - @memori.ai/memori-react - Versions diffs - 7.16.2 → 7.17.0 - Mend

@memori.ai/memori-react 7.16.2 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/src/components/MemoriWidget/MemoriWidget.tsx CHANGED Viewed

@@ -31,7 +31,11 @@ import React, {
 } from 'react';
 import { useTranslation } from 'react-i18next';
 import memoriApiClient from '@memori.ai/memori-api-client';
-import { AudioContext, IAudioContext } from 'standardized-audio-context';
+import {
+  AudioContext,
+  IAudioBufferSourceNode,
+  IAudioContext,
+} from 'standardized-audio-context';
 import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk';
 import cx from 'classnames';
 import { DateTime } from 'luxon';
@@ -596,7 +600,6 @@ const MemoriWidget = ({
   } = useViseme();
   useEffect(() => {
-    setIsPlayingAudio(!!speechSynthesizer);
     memoriSpeaking = !!speechSynthesizer;
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [speechSynthesizer]);
@@ -624,20 +627,17 @@ const MemoriWidget = ({
       defaultControlsPosition = 'bottom';
     }
-    setMuteSpeaker(
-      autoStart ||
-        getLocalConfig(
-          'muteSpeaker',
-          !defaultEnableAudio || !defaultSpeakerActive || autoStart
-        )
-    );
-    speakerMuted =
+    const muteSpeaker =
       autoStart ||
       getLocalConfig(
         'muteSpeaker',
         !defaultEnableAudio || !defaultSpeakerActive || autoStart
       );
-    setContinuousSpeech(microphoneMode === 'CONTINUOUS');
+    setMuteSpeaker(muteSpeaker);
+    speakerMuted = muteSpeaker;
+    setContinuousSpeech(muteSpeaker ? false : microphoneMode === 'CONTINUOUS');
     setContinuousSpeechTimeout(getLocalConfig('continuousSpeechTimeout', 2));
     setControlsPosition(
       getLocalConfig('controlsPosition', defaultControlsPosition)
@@ -1971,15 +1971,24 @@ const MemoriWidget = ({
   };
   const speak = (text: string): void => {
+    console.debug('speak called with text:', text);
     if (!AZURE_COGNITIVE_SERVICES_TTS_KEY || preview) {
+      console.debug('No TTS key or preview mode, emitting end speak event');
       emitEndSpeakEvent();
       return;
     }
+    console.debug('Stopping listening before speaking');
     stopListening();
-    if (preview) return;
+    if (preview) {
+      console.debug('Preview mode, returning early');
+      return;
+    }
     if (speakerMuted) {
+      console.debug('Speaker muted, skipping speech synthesis');
       memoriSpeaking = false;
       setMemoriTyping(false);
@@ -1987,29 +1996,40 @@ const MemoriWidget = ({
       // trigger start continuous listening if set, see MemoriChat
       if (continuousSpeech) {
+        console.debug('Setting listening timeout for continuous speech');
         setListeningTimeout();
       }
       return;
     }
-    if (audioDestination) audioDestination.pause();
+    if (audioDestination) {
+      console.debug('Pausing existing audio destination');
+      audioDestination.pause();
+    }
     let isSafari =
       window.navigator.userAgent.includes('Safari') &&
       !window.navigator.userAgent.includes('Chrome');
     let isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent);
+    console.debug('Browser detection - Safari:', isSafari, 'iOS:', isIOS);
     if ((audioContext.state as string) === 'interrupted') {
+      console.debug('Audio context interrupted, attempting resume');
       audioContext.resume().then(() => speak(text));
       return;
     }
     if (audioContext.state === 'closed') {
+      console.debug('Audio context closed, creating new context');
       audioContext = new AudioContext();
       let buffer = audioContext.createBuffer(1, 10000, 22050);
       let source = audioContext.createBufferSource();
       source.buffer = buffer;
       source.connect(audioContext.destination);
     } else if (audioContext.state === 'suspended') {
+      console.debug(
+        'Audio context suspended, stopping audio and creating new context'
+      );
       stopAudio();
       audioContext = new AudioContext();
@@ -2020,23 +2040,17 @@ const MemoriWidget = ({
     }
     if (!speechSynthesizer) {
-      if (!isIOS) {
-        audioDestination = new speechSdk.SpeakerAudioDestination();
-      }
-      let audioConfig =
-        speechSdk.AudioConfig.fromSpeakerOutput(audioDestination);
-      speechSynthesizer = new speechSdk.SpeechSynthesizer(
-        speechConfig,
-        audioConfig
-      );
+      initializeTTS();
     }
     const source = audioContext.createBufferSource();
     source.addEventListener('ended', () => {
+      console.debug('Audio source ended');
       setIsPlayingAudio(false);
       memoriSpeaking = false;
     });
     audioDestination.onAudioEnd = () => {
+      console.debug('Audio destination ended');
       setIsPlayingAudio(false);
       memoriSpeaking = false;
       source.disconnect();
@@ -2048,100 +2062,134 @@ const MemoriWidget = ({
     };
     // Clear any existing visemes before starting new speech
+    console.debug('Resetting viseme queue');
     resetVisemeQueue();
     // Set up the viseme event handler
-    speechSynthesizer.visemeReceived = function (_, e) {
-      addViseme(e.visemeId, e.audioOffset);
-    };
+    if (speechSynthesizer) {
+      speechSynthesizer.visemeReceived = function (_, e) {
+        console.debug(
+          'Viseme received:',
+          e.visemeId,
+          'at offset:',
+          e.audioOffset
+        );
+        addViseme(e.visemeId, e.audioOffset);
+      };
+    }
     // Set up viseme handling
     const textToSpeak = escapeHTML(
       stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text))))
     );
+    console.debug('Processed text to speak:', textToSpeak);
-    speechSynthesizer.speakSsmlAsync(
-      `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="${getCultureCodeByLanguage(
-        userLang
-      )}"><voice name="${getTTSVoice(userLang)}"><s>${replaceTextWithPhonemes(
-        textToSpeak,
-        userLang.toLowerCase()
-      )}</s></voice></speak>`,
-      result => {
-        if (result) {
-          setIsPlayingAudio(true);
-          memoriSpeaking = true;
-          // Process the viseme data
-          startProcessing(audioContext);
+    setTimeout(() => {
+      if (speechSynthesizer) {
+        console.debug('Starting speech synthesis');
+        speechSynthesizer.speakSsmlAsync(
+          `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="${getCultureCodeByLanguage(
+            userLang
+          )}"><voice name="${getTTSVoice(
+            userLang
+          )}"><s>${replaceTextWithPhonemes(
+            textToSpeak,
+            userLang.toLowerCase()
+          )}</s></voice></speak>`,
+          result => {
+            if (result) {
+              console.debug('Speech synthesis successful');
+              setIsPlayingAudio(true);
+              memoriSpeaking = true;
+              // Process the viseme data
+              startProcessing(audioContext);
-          try {
-            // Decode the audio data
-            audioContext.decodeAudioData(result.audioData, function (buffer) {
-              source.buffer = buffer;
-              source.connect(audioContext.destination);
+              try {
+                // Decode the audio data
+                console.debug('Decoding audio data');
+                audioContext.decodeAudioData(
+                  result.audioData,
+                  function (buffer) {
+                    console.debug('Audio data decoded successfully');
+                    source.buffer = buffer;
+                    source.connect(audioContext.destination);
+                    if (history.length < 1 || (isSafari && isIOS)) {
+                      console.debug('Starting audio playback');
+                      source.start(0);
+                    }
+                  }
+                );
-              if (history.length < 1 || (isSafari && isIOS)) {
-                source.start(0);
-              }
-            });
+                // Handle the audio context state changes
+                audioContext.onstatechange = () => {
+                  console.debug(
+                    'Audio context state changed to:',
+                    audioContext.state
+                  );
+                  if (
+                    audioContext.state === 'suspended' ||
+                    audioContext.state === 'closed'
+                  ) {
+                    source.disconnect();
+                    setIsPlayingAudio(false);
+                    stopProcessing();
+                    resetVisemeQueue();
+                    memoriSpeaking = false;
+                  } else if ((audioContext.state as string) === 'interrupted') {
+                    audioContext.resume();
+                  }
+                };
-            // Handle the audio context state changes
-            audioContext.onstatechange = () => {
-              if (
-                audioContext.state === 'suspended' ||
-                audioContext.state === 'closed'
-              ) {
-                source.disconnect();
-                setIsPlayingAudio(false);
-                stopProcessing();
-                resetVisemeQueue();
-                memoriSpeaking = false;
-              } else if ((audioContext.state as string) === 'interrupted') {
                 audioContext.resume();
-              }
-            };
-            audioContext.resume();
-            if (speechSynthesizer) {
-              speechSynthesizer.close();
-              speechSynthesizer = null;
+                if (speechSynthesizer) {
+                  console.debug('Closing speech synthesizer');
+                  speechSynthesizer.close();
+                  speechSynthesizer = null;
+                }
+              } catch (error) {
+                console.error('Error processing audio data:', error);
+                handleFallback(text);
+              }
+            } else {
+              console.debug('No result from speech synthesis, using fallback');
+              handleFallback(text);
             }
-          } catch (error) {
-            console.error('Error processing audio data:', error);
+          },
+          error => {
+            console.error('Speak error:', error);
             handleFallback(text);
           }
-        } else {
-          handleFallback(text);
-        }
-      },
-      error => {
-        console.error('Speak error:', error);
-        handleFallback(text);
+        );
       }
-    );
+    }, 100);
     setMemoriTyping(false);
   };
   // Helper function for fallback behavior
   const handleFallback = (text: string) => {
-    console.log('Falling back to browser speech synthesis');
     window.speechSynthesis.speak(new SpeechSynthesisUtterance(text));
     cleanup();
   };
-  // Modify cleanup to include speech state reset
-  const cleanup = () => {
+  const cleanup = (): void => {
     setIsPlayingAudio(false);
     stopProcessing();
     resetVisemeQueue();
     memoriSpeaking = false;
-    if (speechSynthesizer) {
-      console.log('Closing speech synthesizer');
-      speechSynthesizer.close();
+    try {
+      if (speechSynthesizer) {
+        const currentSynthesizer = speechSynthesizer;
+        speechSynthesizer = null; // Clear reference first
+        console.debug('Closing speech synthesizer');
+        currentSynthesizer.close();
+      }
+    } catch (error) {
+      console.debug('Error during synthesizer cleanup:', error);
+      // Even if close fails, ensure synthesizer is nullified
       speechSynthesizer = null;
     }
@@ -2149,18 +2197,25 @@ const MemoriWidget = ({
   };
   // Modify stopAudio to include speech state reset
-  const stopAudio = () => {
+  const stopAudio = async (): Promise<void> => {
     setIsPlayingAudio(false);
     memoriSpeaking = false;
     try {
       if (speechSynthesizer) {
-        speechSynthesizer.close();
+        const currentSynthesizer = speechSynthesizer;
         speechSynthesizer = null;
+        try {
+          currentSynthesizer.close();
+        } catch (e) {
+          console.debug('Error closing speech synthesizer:', e);
+        }
       }
-      if (audioContext.state !== 'closed') {
+      if (audioContext?.state !== 'closed') {
         audioContext.close();
       }
       if (audioDestination) {
         audioDestination.pause();
         audioDestination.close();
@@ -2169,7 +2224,6 @@ const MemoriWidget = ({
       console.debug('stopAudio error: ', e);
     }
   };
   const focusChatInput = () => {
     let textarea = document.querySelector(
       '#chat-fieldset textarea'
@@ -2192,126 +2246,192 @@ const MemoriWidget = ({
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [currentDialogState?.emission]);
-  /**
-   * Speech recognition and transcript management
-   */
   const [transcript, setTranscript] = useState('');
-  const resetTranscript = () => setTranscript('');
-  /**
-   * Listening transcript timeout
-   */
   const [transcriptTimeout, setTranscriptTimeout] =
     useState<NodeJS.Timeout | null>(null);
+  const [isSpeaking, setIsSpeaking] = useState(false);
+  // const [isProcessingSTT, setIsProcessingSTT] = useState(false);
+  const resetTranscript = () => {
+    setTranscript('');
+    // setIsProcessingSTT(false);
+  };
   const setListeningTimeout = () => {
-    let timeout = setTimeout(async () => {
-      clearListening();
-      const message = stripDuplicates(transcript);
-      if (message.length > 0 && listening) {
-        sendMessage(message);
-        resetTranscript();
-        setUserMessage('');
-      } else if (listening) {
-        resetInteractionTimeout();
-      }
-    }, continuousSpeechTimeout * 1000);
-    setTranscriptTimeout(timeout);
+    clearListeningTimeout();
+    const timeout = setTimeout(
+      handleTranscriptProcessing,
+      continuousSpeechTimeout * 1000 + 300
+    );
+    setTranscriptTimeout(timeout as unknown as NodeJS.Timeout);
   };
   const clearListeningTimeout = () => {
     if (transcriptTimeout) {
       clearTimeout(transcriptTimeout);
       setTranscriptTimeout(null);
     }
   };
   const resetListeningTimeout = () => {
     clearListeningTimeout();
-    if (continuousSpeech) setListeningTimeout();
+    if (continuousSpeech) {
+      setListeningTimeout();
+    }
   };
+  // Modified useEffect to handle transcript changes
   useEffect(() => {
-    resetListeningTimeout();
-    resetInteractionTimeout();
+    if (!isSpeaking) {
+      resetListeningTimeout();
+      resetInteractionTimeout();
+    }
+  }, [transcript, isSpeaking]);
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [transcript]);
+  // Clean up function for component unmount
+  useEffect(() => {
+    return () => {
+      clearListeningTimeout();
+    };
+  }, []);
   /**
    * Listening methods
    */
-  const startListening = async () => {
-    if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) return;
+  /**
+   * Starts speech recognition using Azure Cognitive Services
+   * Sets up recognizer and begins continuous recognition
+   */
+  const startListening = async (): Promise<void> => {
+    if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) {
+      throw new Error('No TTS key available');
+    }
-    clearListening();
-    setTranscript('');
-    resetTranscript();
+    if (!sessionId) {
+      throw new Error('No session ID available');
+    }
-    // remove focus on chat input if the user is on mobile
-    if (hasTouchscreen()) setEnableFocusChatInput(false);
+    // Ensure complete cleanup before starting, if it's already listening, stop it
+    cleanup();
+    resetTranscript();
     try {
-      navigator.mediaDevices
-        .getUserMedia({ audio: true })
-        .then(function (_stream) {
-          setHasUserActivatedListening(true);
-          if (!speechConfig) {
-            speechConfig = speechSdk.SpeechConfig.fromSubscription(
-              AZURE_COGNITIVE_SERVICES_TTS_KEY,
-              'westeurope'
-            );
-            speechConfig.speechRecognitionLanguage =
-              getCultureCodeByLanguage(userLang);
-            speechConfig.speechSynthesisLanguage =
-              getCultureCodeByLanguage(userLang);
-            speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
-          }
+      // Add delay to ensure previous instance is fully cleaned up
+      // await new Promise(resolve => setTimeout(resolve, 300));
-          const audioConfig =
-            speechSdk.AudioConfig.fromDefaultMicrophoneInput();
-          recognizer = new speechSdk.SpeechRecognizer(
-            speechConfig,
-            audioConfig
-          );
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      setHasUserActivatedListening(true);
-          setListening(true);
-          recognizer.recognized = (_s, e) => {
-            if (!e.result.text) return;
-            if (e.result.reason === speechSdk.ResultReason.RecognizedSpeech) {
-              let transcript = e.result.text;
-              setTranscript(transcript || '');
-              if (transcript?.length > 0) {
-                const transcriptMessage = stripDuplicates(transcript);
-                if (transcriptMessage.length > 0)
-                  setUserMessage(msg => `${msg} ${transcriptMessage}`);
-              }
-            } else if (e.result.reason === speechSdk.ResultReason.NoMatch) {
-              console.debug('NOMATCH: Speech could not be recognized.');
-            }
-          };
-          recognizer.canceled = (_s, e) => {
-            if (e.reason === speechSdk.CancellationReason.Error) {
-              console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
-              console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
-              console.debug(
-                'CANCELED: Did you set the speech resource key and region values?'
-              );
-            }
+      // Recreate speech config each time
+      speechConfig = setupSpeechConfig(AZURE_COGNITIVE_SERVICES_TTS_KEY);
-            stopListening();
-          };
+      const audioConfig = speechSdk.AudioConfig.fromDefaultMicrophoneInput();
+      recognizer = new speechSdk.SpeechRecognizer(speechConfig, audioConfig);
-          recognizer.sessionStopped = (_s, _e) => {
-            stopListening();
-          };
+      // Set up recognizer event handlers
+      setupRecognizerHandlers(recognizer);
+      // Start recognition
+      setListening(true);
+      recognizer.startContinuousRecognitionAsync();
+      recognizer.canceled = (_s, e) => {
+        if (e.reason === speechSdk.CancellationReason.Error) {
+          console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
+          console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
+          console.debug(
+            'CANCELED: Did you set the speech resource key and region values?'
+          );
+          stopListening();
+          cleanup();
+        }
-          resetTranscript();
-          recognizer.startContinuousRecognitionAsync();
-        })
-        .catch(console.debug);
+        stopListening();
+      };
+      recognizer.sessionStopped = (_s, _e) => {
+        stopListening();
+        resetTranscript();
+      };
     } catch (error) {
-      console.debug(error);
+      console.error('Error in startListening:', error);
+      stopListening();
+      throw error;
+    }
+  };
+  const setupSpeechConfig = (AZURE_COGNITIVE_SERVICES_TTS_KEY: string) => {
+    speechConfig = speechSdk.SpeechConfig.fromSubscription(
+      AZURE_COGNITIVE_SERVICES_TTS_KEY,
+      'westeurope'
+    );
+    speechConfig.speechRecognitionLanguage = getCultureCodeByLanguage(userLang);
+    speechConfig.speechSynthesisLanguage = getCultureCodeByLanguage(userLang);
+    speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
+    return speechConfig;
+  };
+  const setupRecognizerHandlers = (recognizer: speechSdk.SpeechRecognizer) => {
+    if (recognizer) {
+      recognizer.recognized = (_, event) => {
+        // Process the recognized speech result
+        handleRecognizedSpeech(event.result.text);
+      };
+      // Configure speech recognition properties directly on the recognizer
+      recognizer.properties.setProperty(
+        'SpeechServiceResponse_JsonResult',
+        'true'
+      );
+      recognizer.properties.setProperty(
+        'SpeechServiceConnection_NoiseSuppression',
+        'true'
+      );
+      recognizer.properties.setProperty(
+        'SpeechServiceConnection_SNRThresholdDb',
+        '10.0'
+      );
+    }
+  };
+  const handleRecognizedSpeech = (text: string) => {
+    console.debug('Handling recognized speech:', text);
+    if (!text || text.trim().length === 0) {
+      console.debug('No valid text received from speech recognition');
+      return;
+    }
+    setTranscript(text);
+    setIsSpeaking(false);
+    const message = stripDuplicates(text);
+    console.debug('Stripped message:', message);
+    if (message.length > 0) {
+      setUserMessage(message);
+    }
+  };
+  // Helper function to handle transcript processing
+  const handleTranscriptProcessing = () => {
+    const message = stripDuplicates(transcript);
+    if (message.length > 0 && listening) {
+      sendMessage(message);
+      resetTranscript();
+      setUserMessage('');
+      clearListening();
+    } else if (listening) {
+      resetInteractionTimeout();
     }
   };
+  /**
+   * Stops the speech recognition process
+   * Closes recognizer and cleans up resources
+   */
   const stopListening = () => {
+    console.debug('Stopping speech recognition');
     if (recognizer) {
       // Stop continuous recognition and close the recognizer
       recognizer.stopContinuousRecognitionAsync();
@@ -2320,11 +2440,18 @@ const MemoriWidget = ({
     }
     setListening(false);
   };
+  /**
+   * Clears all listening state and stops recognition
+   */
   const clearListening = () => {
-    setHasUserActivatedListening(false);
     stopListening();
     clearListeningTimeout();
+    setIsSpeaking(false);
   };
+  /**
+   * Resets listening state and restarts recognition if currently listening
+   */
   const resetListening = () => {
     if (listening) {
       clearListening();
@@ -2383,19 +2510,22 @@ const MemoriWidget = ({
     // eslint-disable-next-line react-hooks/exhaustive-deps
     [continuousSpeech, hasUserActivatedListening]
   );
   useEffect(() => {
+    // if memori is speaking, don't start listening
     if (
-      history.length > 1 &&
       !isPlayingAudio &&
       continuousSpeech &&
-      (hasUserActivatedListening || !requestedListening)
-    )
+      (hasUserActivatedListening || !requestedListening) &&
+      sessionId
+    ) {
       startListening();
-    else if (isPlayingAudio && listening) {
+    } else if (isPlayingAudio && listening) {
       stopListening();
     }
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [isPlayingAudio]);
+  }, [isPlayingAudio, hasUserActivatedListening]);
   useEffect(() => {
     resetListening();
     // eslint-disable-next-line react-hooks/exhaustive-deps
@@ -2617,11 +2747,6 @@ const MemoriWidget = ({
       session?: { dialogState: DialogState; sessionID: string },
       initialSessionExpired = false
     ) => {
-      // console.log('[CLICK_START] Starting onClickStart with params:', {
-      //   hasSession: !!session,
-      //   initialSessionExpired
-      // });
       const sessionID = session?.sessionID || sessionId;
       const dialogState = session?.dialogState || currentDialogState;
       setClickedStart(true);
@@ -2648,7 +2773,10 @@ const MemoriWidget = ({
         'birthDate',
         undefined
       );
-      let birth = birthDate || storageBirthDate || undefined;
+      let birth = birthDate || storageBirthDate || user?.birthDate;
+      if (!birth && autoStart && initialSessionID)
+        birth = '1970-01-01T10:24:03.845Z';
       // console.log('[CLICK_START] Using birth date:', birth);
       // Handle age verification
@@ -2926,6 +3054,8 @@ const MemoriWidget = ({
             (!!translatedMessages?.length && translatedMessages.length > 1) ||
             !initialQuestion
           ) {
+            console.log('[CLICK_START] Using existing chat history');
             // we have a history, don't push message
             translateDialogState(
               currentState,
@@ -2945,10 +3075,16 @@ const MemoriWidget = ({
                 setHasUserActivatedSpeak(true);
               });
           } else {
+            console.log(
+              '[CLICK_START] Using existing chat history with message from initial question'
+            );
             // remove default initial message
             translatedMessages = [];
             setHistory([]);
+            setMemoriTyping(true);
             // we have no chat history, we start by initial question
             const response = await postTextEnteredEvent({
               sessionId: sessionID,
@@ -2968,6 +3104,7 @@ const MemoriWidget = ({
                 }
               })
               .finally(() => {
+                setMemoriTyping(false);
                 setHasUserActivatedSpeak(true);
               });
           }
@@ -3002,6 +3139,10 @@ const MemoriWidget = ({
   useEffect(() => {
     if (!clickedStart && autoStart) {
+      // Initialize TTS before starting if AZURE_COGNITIVE_SERVICES_TTS_KEY exists
+      if (AZURE_COGNITIVE_SERVICES_TTS_KEY && !speechSynthesizer) {
+        initializeTTS();
+      }
       onClickStart();
     }
   }, [clickedStart, autoStart]);
@@ -3135,6 +3276,14 @@ const MemoriWidget = ({
     setSpeakerMuted: mute => {
       speakerMuted = !!mute;
       setMuteSpeaker(mute);
+      let microphoneMode = getLocalConfig<string>(
+        'microphoneMode',
+        'HOLD_TO_TALK'
+      );
+      if (microphoneMode === 'CONTINUOUS' && mute) {
+        setContinuousSpeech(false);
+        setLocalConfig('microphoneMode', 'HOLD_TO_TALK');
+      }
       setLocalConfig('muteSpeaker', !!mute);
       if (mute) {
         stopAudio();
@@ -3491,6 +3640,7 @@ const MemoriWidget = ({
           setEnablePositionControls={setEnablePositionControls}
           isAvatar3d={!!integrationConfig?.avatarURL}
           additionalSettings={additionalSettings}
+          speakerMuted={speakerMuted}
         />
       )}