@memori.ai/memori-react 7.16.2 → 7.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.d.ts +27 -0
  3. package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js +52 -0
  4. package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js.map +1 -0
  5. package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
  6. package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js +7 -7
  7. package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js.map +1 -1
  8. package/dist/components/Avatar/AvatarView/index.js +2 -3
  9. package/dist/components/Avatar/AvatarView/index.js.map +1 -1
  10. package/dist/components/ChatTextArea/ChatTextArea.css +55 -60
  11. package/dist/components/MemoriWidget/MemoriWidget.js +215 -138
  12. package/dist/components/MemoriWidget/MemoriWidget.js.map +1 -1
  13. package/dist/components/SettingsDrawer/SettingsDrawer.css +5 -0
  14. package/dist/components/SettingsDrawer/SettingsDrawer.d.ts +2 -1
  15. package/dist/components/SettingsDrawer/SettingsDrawer.js +6 -3
  16. package/dist/components/SettingsDrawer/SettingsDrawer.js.map +1 -1
  17. package/dist/components/UploadButton/UploadButton.d.ts +5 -0
  18. package/dist/components/UploadButton/UploadButton.js +49 -48
  19. package/dist/components/UploadButton/UploadButton.js.map +1 -1
  20. package/dist/components/ui/Slider.css +59 -44
  21. package/dist/context/visemeContext.d.ts +1 -1
  22. package/dist/context/visemeContext.js +2 -2
  23. package/dist/context/visemeContext.js.map +1 -1
  24. package/dist/locales/de.json +1 -0
  25. package/dist/locales/en.json +1 -0
  26. package/dist/locales/es.json +1 -0
  27. package/dist/locales/fr.json +1 -0
  28. package/dist/locales/it.json +1 -0
  29. package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.d.ts +27 -0
  30. package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js +48 -0
  31. package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js.map +1 -0
  32. package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
  33. package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js +7 -7
  34. package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js.map +1 -1
  35. package/esm/components/Avatar/AvatarView/index.js +3 -4
  36. package/esm/components/Avatar/AvatarView/index.js.map +1 -1
  37. package/esm/components/ChatTextArea/ChatTextArea.css +55 -60
  38. package/esm/components/MemoriWidget/MemoriWidget.js +216 -139
  39. package/esm/components/MemoriWidget/MemoriWidget.js.map +1 -1
  40. package/esm/components/SettingsDrawer/SettingsDrawer.css +5 -0
  41. package/esm/components/SettingsDrawer/SettingsDrawer.d.ts +2 -1
  42. package/esm/components/SettingsDrawer/SettingsDrawer.js +6 -3
  43. package/esm/components/SettingsDrawer/SettingsDrawer.js.map +1 -1
  44. package/esm/components/UploadButton/UploadButton.d.ts +5 -0
  45. package/esm/components/UploadButton/UploadButton.js +50 -49
  46. package/esm/components/UploadButton/UploadButton.js.map +1 -1
  47. package/esm/components/ui/Slider.css +59 -44
  48. package/esm/context/visemeContext.d.ts +1 -1
  49. package/esm/context/visemeContext.js +2 -2
  50. package/esm/context/visemeContext.js.map +1 -1
  51. package/esm/locales/de.json +1 -0
  52. package/esm/locales/en.json +1 -0
  53. package/esm/locales/es.json +1 -0
  54. package/esm/locales/fr.json +1 -0
  55. package/esm/locales/it.json +1 -0
  56. package/package.json +1 -2
  57. package/src/components/Avatar/AvatarView/AvatarComponent/lights/Lights.tsx +145 -0
  58. package/src/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
  59. package/src/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.tsx +6 -14
  60. package/src/components/Avatar/AvatarView/index.tsx +5 -14
  61. package/src/components/ChatTextArea/ChatTextArea.css +55 -60
  62. package/src/components/MemoriWidget/MemoriWidget.tsx +337 -187
  63. package/src/components/SettingsDrawer/SettingsDrawer.css +5 -0
  64. package/src/components/SettingsDrawer/SettingsDrawer.tsx +29 -11
  65. package/src/components/UploadButton/UploadButton.tsx +139 -118
  66. package/src/components/UploadButton/__snapshots__/UploadButton.test.tsx.snap +3 -52
  67. package/src/components/ui/Slider.css +59 -44
  68. package/src/context/visemeContext.tsx +2 -2
  69. package/src/locales/de.json +1 -0
  70. package/src/locales/en.json +1 -0
  71. package/src/locales/es.json +1 -0
  72. package/src/locales/fr.json +1 -0
  73. package/src/locales/it.json +1 -0
@@ -31,7 +31,11 @@ import React, {
31
31
  } from 'react';
32
32
  import { useTranslation } from 'react-i18next';
33
33
  import memoriApiClient from '@memori.ai/memori-api-client';
34
- import { AudioContext, IAudioContext } from 'standardized-audio-context';
34
+ import {
35
+ AudioContext,
36
+ IAudioBufferSourceNode,
37
+ IAudioContext,
38
+ } from 'standardized-audio-context';
35
39
  import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk';
36
40
  import cx from 'classnames';
37
41
  import { DateTime } from 'luxon';
@@ -596,7 +600,6 @@ const MemoriWidget = ({
596
600
  } = useViseme();
597
601
 
598
602
  useEffect(() => {
599
- setIsPlayingAudio(!!speechSynthesizer);
600
603
  memoriSpeaking = !!speechSynthesizer;
601
604
  // eslint-disable-next-line react-hooks/exhaustive-deps
602
605
  }, [speechSynthesizer]);
@@ -624,20 +627,17 @@ const MemoriWidget = ({
624
627
  defaultControlsPosition = 'bottom';
625
628
  }
626
629
 
627
- setMuteSpeaker(
628
- autoStart ||
629
- getLocalConfig(
630
- 'muteSpeaker',
631
- !defaultEnableAudio || !defaultSpeakerActive || autoStart
632
- )
633
- );
634
- speakerMuted =
630
+ const muteSpeaker =
635
631
  autoStart ||
636
632
  getLocalConfig(
637
633
  'muteSpeaker',
638
634
  !defaultEnableAudio || !defaultSpeakerActive || autoStart
639
635
  );
640
- setContinuousSpeech(microphoneMode === 'CONTINUOUS');
636
+
637
+ setMuteSpeaker(muteSpeaker);
638
+ speakerMuted = muteSpeaker;
639
+
640
+ setContinuousSpeech(muteSpeaker ? false : microphoneMode === 'CONTINUOUS');
641
641
  setContinuousSpeechTimeout(getLocalConfig('continuousSpeechTimeout', 2));
642
642
  setControlsPosition(
643
643
  getLocalConfig('controlsPosition', defaultControlsPosition)
@@ -1971,15 +1971,24 @@ const MemoriWidget = ({
1971
1971
  };
1972
1972
 
1973
1973
  const speak = (text: string): void => {
1974
+ console.debug('speak called with text:', text);
1975
+
1974
1976
  if (!AZURE_COGNITIVE_SERVICES_TTS_KEY || preview) {
1977
+ console.debug('No TTS key or preview mode, emitting end speak event');
1975
1978
  emitEndSpeakEvent();
1976
1979
  return;
1977
1980
  }
1981
+
1982
+ console.debug('Stopping listening before speaking');
1978
1983
  stopListening();
1979
1984
 
1980
- if (preview) return;
1985
+ if (preview) {
1986
+ console.debug('Preview mode, returning early');
1987
+ return;
1988
+ }
1981
1989
 
1982
1990
  if (speakerMuted) {
1991
+ console.debug('Speaker muted, skipping speech synthesis');
1983
1992
  memoriSpeaking = false;
1984
1993
  setMemoriTyping(false);
1985
1994
 
@@ -1987,29 +1996,40 @@ const MemoriWidget = ({
1987
1996
 
1988
1997
  // trigger start continuous listening if set, see MemoriChat
1989
1998
  if (continuousSpeech) {
1999
+ console.debug('Setting listening timeout for continuous speech');
1990
2000
  setListeningTimeout();
1991
2001
  }
1992
2002
  return;
1993
2003
  }
1994
2004
 
1995
- if (audioDestination) audioDestination.pause();
2005
+ if (audioDestination) {
2006
+ console.debug('Pausing existing audio destination');
2007
+ audioDestination.pause();
2008
+ }
1996
2009
 
1997
2010
  let isSafari =
1998
2011
  window.navigator.userAgent.includes('Safari') &&
1999
2012
  !window.navigator.userAgent.includes('Chrome');
2000
2013
  let isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent);
2001
2014
 
2015
+ console.debug('Browser detection - Safari:', isSafari, 'iOS:', isIOS);
2016
+
2002
2017
  if ((audioContext.state as string) === 'interrupted') {
2018
+ console.debug('Audio context interrupted, attempting resume');
2003
2019
  audioContext.resume().then(() => speak(text));
2004
2020
  return;
2005
2021
  }
2006
2022
  if (audioContext.state === 'closed') {
2023
+ console.debug('Audio context closed, creating new context');
2007
2024
  audioContext = new AudioContext();
2008
2025
  let buffer = audioContext.createBuffer(1, 10000, 22050);
2009
2026
  let source = audioContext.createBufferSource();
2010
2027
  source.buffer = buffer;
2011
2028
  source.connect(audioContext.destination);
2012
2029
  } else if (audioContext.state === 'suspended') {
2030
+ console.debug(
2031
+ 'Audio context suspended, stopping audio and creating new context'
2032
+ );
2013
2033
  stopAudio();
2014
2034
 
2015
2035
  audioContext = new AudioContext();
@@ -2020,23 +2040,17 @@ const MemoriWidget = ({
2020
2040
  }
2021
2041
 
2022
2042
  if (!speechSynthesizer) {
2023
- if (!isIOS) {
2024
- audioDestination = new speechSdk.SpeakerAudioDestination();
2025
- }
2026
- let audioConfig =
2027
- speechSdk.AudioConfig.fromSpeakerOutput(audioDestination);
2028
- speechSynthesizer = new speechSdk.SpeechSynthesizer(
2029
- speechConfig,
2030
- audioConfig
2031
- );
2043
+ initializeTTS();
2032
2044
  }
2033
2045
 
2034
2046
  const source = audioContext.createBufferSource();
2035
2047
  source.addEventListener('ended', () => {
2048
+ console.debug('Audio source ended');
2036
2049
  setIsPlayingAudio(false);
2037
2050
  memoriSpeaking = false;
2038
2051
  });
2039
2052
  audioDestination.onAudioEnd = () => {
2053
+ console.debug('Audio destination ended');
2040
2054
  setIsPlayingAudio(false);
2041
2055
  memoriSpeaking = false;
2042
2056
  source.disconnect();
@@ -2048,100 +2062,134 @@ const MemoriWidget = ({
2048
2062
  };
2049
2063
 
2050
2064
  // Clear any existing visemes before starting new speech
2065
+ console.debug('Resetting viseme queue');
2051
2066
  resetVisemeQueue();
2052
2067
 
2053
2068
  // Set up the viseme event handler
2054
- speechSynthesizer.visemeReceived = function (_, e) {
2055
- addViseme(e.visemeId, e.audioOffset);
2056
- };
2069
+ if (speechSynthesizer) {
2070
+ speechSynthesizer.visemeReceived = function (_, e) {
2071
+ console.debug(
2072
+ 'Viseme received:',
2073
+ e.visemeId,
2074
+ 'at offset:',
2075
+ e.audioOffset
2076
+ );
2077
+ addViseme(e.visemeId, e.audioOffset);
2078
+ };
2079
+ }
2057
2080
 
2058
2081
  // Set up viseme handling
2059
2082
  const textToSpeak = escapeHTML(
2060
2083
  stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text))))
2061
2084
  );
2085
+ console.debug('Processed text to speak:', textToSpeak);
2062
2086
 
2063
- speechSynthesizer.speakSsmlAsync(
2064
- `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="${getCultureCodeByLanguage(
2065
- userLang
2066
- )}"><voice name="${getTTSVoice(userLang)}"><s>${replaceTextWithPhonemes(
2067
- textToSpeak,
2068
- userLang.toLowerCase()
2069
- )}</s></voice></speak>`,
2070
- result => {
2071
- if (result) {
2072
- setIsPlayingAudio(true);
2073
- memoriSpeaking = true;
2074
-
2075
- // Process the viseme data
2076
- startProcessing(audioContext);
2087
+ setTimeout(() => {
2088
+ if (speechSynthesizer) {
2089
+ console.debug('Starting speech synthesis');
2090
+ speechSynthesizer.speakSsmlAsync(
2091
+ `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="${getCultureCodeByLanguage(
2092
+ userLang
2093
+ )}"><voice name="${getTTSVoice(
2094
+ userLang
2095
+ )}"><s>${replaceTextWithPhonemes(
2096
+ textToSpeak,
2097
+ userLang.toLowerCase()
2098
+ )}</s></voice></speak>`,
2099
+ result => {
2100
+ if (result) {
2101
+ console.debug('Speech synthesis successful');
2102
+ setIsPlayingAudio(true);
2103
+ memoriSpeaking = true;
2104
+
2105
+ // Process the viseme data
2106
+ startProcessing(audioContext);
2077
2107
 
2078
- try {
2079
- // Decode the audio data
2080
- audioContext.decodeAudioData(result.audioData, function (buffer) {
2081
- source.buffer = buffer;
2082
- source.connect(audioContext.destination);
2108
+ try {
2109
+ // Decode the audio data
2110
+ console.debug('Decoding audio data');
2111
+ audioContext.decodeAudioData(
2112
+ result.audioData,
2113
+ function (buffer) {
2114
+ console.debug('Audio data decoded successfully');
2115
+ source.buffer = buffer;
2116
+ source.connect(audioContext.destination);
2117
+
2118
+ if (history.length < 1 || (isSafari && isIOS)) {
2119
+ console.debug('Starting audio playback');
2120
+ source.start(0);
2121
+ }
2122
+ }
2123
+ );
2083
2124
 
2084
- if (history.length < 1 || (isSafari && isIOS)) {
2085
- source.start(0);
2086
- }
2087
- });
2125
+ // Handle the audio context state changes
2126
+ audioContext.onstatechange = () => {
2127
+ console.debug(
2128
+ 'Audio context state changed to:',
2129
+ audioContext.state
2130
+ );
2131
+ if (
2132
+ audioContext.state === 'suspended' ||
2133
+ audioContext.state === 'closed'
2134
+ ) {
2135
+ source.disconnect();
2136
+ setIsPlayingAudio(false);
2137
+ stopProcessing();
2138
+ resetVisemeQueue();
2139
+ memoriSpeaking = false;
2140
+ } else if ((audioContext.state as string) === 'interrupted') {
2141
+ audioContext.resume();
2142
+ }
2143
+ };
2088
2144
 
2089
- // Handle the audio context state changes
2090
- audioContext.onstatechange = () => {
2091
- if (
2092
- audioContext.state === 'suspended' ||
2093
- audioContext.state === 'closed'
2094
- ) {
2095
- source.disconnect();
2096
- setIsPlayingAudio(false);
2097
- stopProcessing();
2098
- resetVisemeQueue();
2099
- memoriSpeaking = false;
2100
- } else if ((audioContext.state as string) === 'interrupted') {
2101
2145
  audioContext.resume();
2102
- }
2103
- };
2104
-
2105
- audioContext.resume();
2106
2146
 
2107
- if (speechSynthesizer) {
2108
- speechSynthesizer.close();
2109
- speechSynthesizer = null;
2147
+ if (speechSynthesizer) {
2148
+ console.debug('Closing speech synthesizer');
2149
+ speechSynthesizer.close();
2150
+ speechSynthesizer = null;
2151
+ }
2152
+ } catch (error) {
2153
+ console.error('Error processing audio data:', error);
2154
+ handleFallback(text);
2155
+ }
2156
+ } else {
2157
+ console.debug('No result from speech synthesis, using fallback');
2158
+ handleFallback(text);
2110
2159
  }
2111
- } catch (error) {
2112
- console.error('Error processing audio data:', error);
2160
+ },
2161
+ error => {
2162
+ console.error('Speak error:', error);
2113
2163
  handleFallback(text);
2114
2164
  }
2115
- } else {
2116
- handleFallback(text);
2117
- }
2118
- },
2119
- error => {
2120
- console.error('Speak error:', error);
2121
- handleFallback(text);
2165
+ );
2122
2166
  }
2123
- );
2124
-
2167
+ }, 100);
2125
2168
  setMemoriTyping(false);
2126
2169
  };
2127
2170
 
2128
2171
  // Helper function for fallback behavior
2129
2172
  const handleFallback = (text: string) => {
2130
- console.log('Falling back to browser speech synthesis');
2131
2173
  window.speechSynthesis.speak(new SpeechSynthesisUtterance(text));
2132
2174
  cleanup();
2133
2175
  };
2134
2176
 
2135
- // Modify cleanup to include speech state reset
2136
- const cleanup = () => {
2177
+ const cleanup = (): void => {
2137
2178
  setIsPlayingAudio(false);
2138
2179
  stopProcessing();
2139
2180
  resetVisemeQueue();
2140
2181
  memoriSpeaking = false;
2141
2182
 
2142
- if (speechSynthesizer) {
2143
- console.log('Closing speech synthesizer');
2144
- speechSynthesizer.close();
2183
+ try {
2184
+ if (speechSynthesizer) {
2185
+ const currentSynthesizer = speechSynthesizer;
2186
+ speechSynthesizer = null; // Clear reference first
2187
+ console.debug('Closing speech synthesizer');
2188
+ currentSynthesizer.close();
2189
+ }
2190
+ } catch (error) {
2191
+ console.debug('Error during synthesizer cleanup:', error);
2192
+ // Even if close fails, ensure synthesizer is nullified
2145
2193
  speechSynthesizer = null;
2146
2194
  }
2147
2195
 
@@ -2149,18 +2197,25 @@ const MemoriWidget = ({
2149
2197
  };
2150
2198
 
2151
2199
  // Modify stopAudio to include speech state reset
2152
- const stopAudio = () => {
2200
+ const stopAudio = async (): Promise<void> => {
2153
2201
  setIsPlayingAudio(false);
2154
2202
  memoriSpeaking = false;
2155
2203
 
2156
2204
  try {
2157
2205
  if (speechSynthesizer) {
2158
- speechSynthesizer.close();
2206
+ const currentSynthesizer = speechSynthesizer;
2159
2207
  speechSynthesizer = null;
2208
+ try {
2209
+ currentSynthesizer.close();
2210
+ } catch (e) {
2211
+ console.debug('Error closing speech synthesizer:', e);
2212
+ }
2160
2213
  }
2161
- if (audioContext.state !== 'closed') {
2214
+
2215
+ if (audioContext?.state !== 'closed') {
2162
2216
  audioContext.close();
2163
2217
  }
2218
+
2164
2219
  if (audioDestination) {
2165
2220
  audioDestination.pause();
2166
2221
  audioDestination.close();
@@ -2169,7 +2224,6 @@ const MemoriWidget = ({
2169
2224
  console.debug('stopAudio error: ', e);
2170
2225
  }
2171
2226
  };
2172
-
2173
2227
  const focusChatInput = () => {
2174
2228
  let textarea = document.querySelector(
2175
2229
  '#chat-fieldset textarea'
@@ -2192,126 +2246,192 @@ const MemoriWidget = ({
2192
2246
  // eslint-disable-next-line react-hooks/exhaustive-deps
2193
2247
  }, [currentDialogState?.emission]);
2194
2248
 
2195
- /**
2196
- * Speech recognition and transcript management
2197
- */
2198
2249
  const [transcript, setTranscript] = useState('');
2199
- const resetTranscript = () => setTranscript('');
2200
-
2201
- /**
2202
- * Listening transcript timeout
2203
- */
2204
2250
  const [transcriptTimeout, setTranscriptTimeout] =
2205
2251
  useState<NodeJS.Timeout | null>(null);
2252
+ const [isSpeaking, setIsSpeaking] = useState(false);
2253
+ // const [isProcessingSTT, setIsProcessingSTT] = useState(false);
2254
+
2255
+ const resetTranscript = () => {
2256
+ setTranscript('');
2257
+ // setIsProcessingSTT(false);
2258
+ };
2259
+
2206
2260
  const setListeningTimeout = () => {
2207
- let timeout = setTimeout(async () => {
2208
- clearListening();
2209
- const message = stripDuplicates(transcript);
2210
- if (message.length > 0 && listening) {
2211
- sendMessage(message);
2212
- resetTranscript();
2213
- setUserMessage('');
2214
- } else if (listening) {
2215
- resetInteractionTimeout();
2216
- }
2217
- }, continuousSpeechTimeout * 1000);
2218
- setTranscriptTimeout(timeout);
2261
+ clearListeningTimeout();
2262
+ const timeout = setTimeout(
2263
+ handleTranscriptProcessing,
2264
+ continuousSpeechTimeout * 1000 + 300
2265
+ );
2266
+ setTranscriptTimeout(timeout as unknown as NodeJS.Timeout);
2219
2267
  };
2268
+
2220
2269
  const clearListeningTimeout = () => {
2221
2270
  if (transcriptTimeout) {
2222
2271
  clearTimeout(transcriptTimeout);
2223
2272
  setTranscriptTimeout(null);
2224
2273
  }
2225
2274
  };
2275
+
2226
2276
  const resetListeningTimeout = () => {
2227
2277
  clearListeningTimeout();
2228
- if (continuousSpeech) setListeningTimeout();
2278
+ if (continuousSpeech) {
2279
+ setListeningTimeout();
2280
+ }
2229
2281
  };
2282
+ // Modified useEffect to handle transcript changes
2230
2283
  useEffect(() => {
2231
- resetListeningTimeout();
2232
- resetInteractionTimeout();
2284
+ if (!isSpeaking) {
2285
+ resetListeningTimeout();
2286
+ resetInteractionTimeout();
2287
+ }
2288
+ }, [transcript, isSpeaking]);
2233
2289
 
2234
- // eslint-disable-next-line react-hooks/exhaustive-deps
2235
- }, [transcript]);
2290
+ // Clean up function for component unmount
2291
+ useEffect(() => {
2292
+ return () => {
2293
+ clearListeningTimeout();
2294
+ };
2295
+ }, []);
2236
2296
 
2237
2297
  /**
2238
2298
  * Listening methods
2239
2299
  */
2240
- const startListening = async () => {
2241
- if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) return;
2300
+ /**
2301
+ * Starts speech recognition using Azure Cognitive Services
2302
+ * Sets up recognizer and begins continuous recognition
2303
+ */
2304
+ const startListening = async (): Promise<void> => {
2305
+ if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) {
2306
+ throw new Error('No TTS key available');
2307
+ }
2242
2308
 
2243
- clearListening();
2244
- setTranscript('');
2245
- resetTranscript();
2309
+ if (!sessionId) {
2310
+ throw new Error('No session ID available');
2311
+ }
2246
2312
 
2247
- // remove focus on chat input if the user is on mobile
2248
- if (hasTouchscreen()) setEnableFocusChatInput(false);
2313
+ // Ensure complete cleanup before starting, if it's already listening, stop it
2314
+ cleanup();
2315
+ resetTranscript();
2249
2316
 
2250
2317
  try {
2251
- navigator.mediaDevices
2252
- .getUserMedia({ audio: true })
2253
- .then(function (_stream) {
2254
- setHasUserActivatedListening(true);
2255
-
2256
- if (!speechConfig) {
2257
- speechConfig = speechSdk.SpeechConfig.fromSubscription(
2258
- AZURE_COGNITIVE_SERVICES_TTS_KEY,
2259
- 'westeurope'
2260
- );
2261
- speechConfig.speechRecognitionLanguage =
2262
- getCultureCodeByLanguage(userLang);
2263
- speechConfig.speechSynthesisLanguage =
2264
- getCultureCodeByLanguage(userLang);
2265
- speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
2266
- }
2318
+ // Add delay to ensure previous instance is fully cleaned up
2319
+ // await new Promise(resolve => setTimeout(resolve, 300));
2267
2320
 
2268
- const audioConfig =
2269
- speechSdk.AudioConfig.fromDefaultMicrophoneInput();
2270
- recognizer = new speechSdk.SpeechRecognizer(
2271
- speechConfig,
2272
- audioConfig
2273
- );
2321
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
2322
+ setHasUserActivatedListening(true);
2274
2323
 
2275
- setListening(true);
2276
- recognizer.recognized = (_s, e) => {
2277
- if (!e.result.text) return;
2278
- if (e.result.reason === speechSdk.ResultReason.RecognizedSpeech) {
2279
- let transcript = e.result.text;
2280
- setTranscript(transcript || '');
2281
- if (transcript?.length > 0) {
2282
- const transcriptMessage = stripDuplicates(transcript);
2283
- if (transcriptMessage.length > 0)
2284
- setUserMessage(msg => `${msg} ${transcriptMessage}`);
2285
- }
2286
- } else if (e.result.reason === speechSdk.ResultReason.NoMatch) {
2287
- console.debug('NOMATCH: Speech could not be recognized.');
2288
- }
2289
- };
2290
- recognizer.canceled = (_s, e) => {
2291
- if (e.reason === speechSdk.CancellationReason.Error) {
2292
- console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
2293
- console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
2294
- console.debug(
2295
- 'CANCELED: Did you set the speech resource key and region values?'
2296
- );
2297
- }
2324
+ // Recreate speech config each time
2325
+ speechConfig = setupSpeechConfig(AZURE_COGNITIVE_SERVICES_TTS_KEY);
2298
2326
 
2299
- stopListening();
2300
- };
2327
+ const audioConfig = speechSdk.AudioConfig.fromDefaultMicrophoneInput();
2328
+ recognizer = new speechSdk.SpeechRecognizer(speechConfig, audioConfig);
2301
2329
 
2302
- recognizer.sessionStopped = (_s, _e) => {
2303
- stopListening();
2304
- };
2330
+ // Set up recognizer event handlers
2331
+ setupRecognizerHandlers(recognizer);
2332
+
2333
+ // Start recognition
2334
+ setListening(true);
2335
+ recognizer.startContinuousRecognitionAsync();
2336
+
2337
+ recognizer.canceled = (_s, e) => {
2338
+ if (e.reason === speechSdk.CancellationReason.Error) {
2339
+ console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
2340
+ console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
2341
+ console.debug(
2342
+ 'CANCELED: Did you set the speech resource key and region values?'
2343
+ );
2344
+ stopListening();
2345
+ cleanup();
2346
+ }
2305
2347
 
2306
- resetTranscript();
2307
- recognizer.startContinuousRecognitionAsync();
2308
- })
2309
- .catch(console.debug);
2348
+ stopListening();
2349
+ };
2350
+
2351
+ recognizer.sessionStopped = (_s, _e) => {
2352
+ stopListening();
2353
+ resetTranscript();
2354
+ };
2310
2355
  } catch (error) {
2311
- console.debug(error);
2356
+ console.error('Error in startListening:', error);
2357
+ stopListening();
2358
+ throw error;
2359
+ }
2360
+ };
2361
+
2362
+ const setupSpeechConfig = (AZURE_COGNITIVE_SERVICES_TTS_KEY: string) => {
2363
+ speechConfig = speechSdk.SpeechConfig.fromSubscription(
2364
+ AZURE_COGNITIVE_SERVICES_TTS_KEY,
2365
+ 'westeurope'
2366
+ );
2367
+ speechConfig.speechRecognitionLanguage = getCultureCodeByLanguage(userLang);
2368
+ speechConfig.speechSynthesisLanguage = getCultureCodeByLanguage(userLang);
2369
+ speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
2370
+ return speechConfig;
2371
+ };
2372
+
2373
+ const setupRecognizerHandlers = (recognizer: speechSdk.SpeechRecognizer) => {
2374
+ if (recognizer) {
2375
+ recognizer.recognized = (_, event) => {
2376
+ // Process the recognized speech result
2377
+ handleRecognizedSpeech(event.result.text);
2378
+ };
2379
+
2380
+ // Configure speech recognition properties directly on the recognizer
2381
+ recognizer.properties.setProperty(
2382
+ 'SpeechServiceResponse_JsonResult',
2383
+ 'true'
2384
+ );
2385
+
2386
+ recognizer.properties.setProperty(
2387
+ 'SpeechServiceConnection_NoiseSuppression',
2388
+ 'true'
2389
+ );
2390
+
2391
+ recognizer.properties.setProperty(
2392
+ 'SpeechServiceConnection_SNRThresholdDb',
2393
+ '10.0'
2394
+ );
2395
+ }
2396
+ };
2397
+
2398
+ const handleRecognizedSpeech = (text: string) => {
2399
+ console.debug('Handling recognized speech:', text);
2400
+
2401
+ if (!text || text.trim().length === 0) {
2402
+ console.debug('No valid text received from speech recognition');
2403
+ return;
2404
+ }
2405
+
2406
+ setTranscript(text);
2407
+ setIsSpeaking(false);
2408
+
2409
+ const message = stripDuplicates(text);
2410
+ console.debug('Stripped message:', message);
2411
+ if (message.length > 0) {
2412
+ setUserMessage(message);
2413
+ }
2414
+ };
2415
+
2416
+ // Helper function to handle transcript processing
2417
+ const handleTranscriptProcessing = () => {
2418
+ const message = stripDuplicates(transcript);
2419
+ if (message.length > 0 && listening) {
2420
+ sendMessage(message);
2421
+ resetTranscript();
2422
+ setUserMessage('');
2423
+ clearListening();
2424
+ } else if (listening) {
2425
+ resetInteractionTimeout();
2312
2426
  }
2313
2427
  };
2428
+
2429
+ /**
2430
+ * Stops the speech recognition process
2431
+ * Closes recognizer and cleans up resources
2432
+ */
2314
2433
  const stopListening = () => {
2434
+ console.debug('Stopping speech recognition');
2315
2435
  if (recognizer) {
2316
2436
  // Stop continuous recognition and close the recognizer
2317
2437
  recognizer.stopContinuousRecognitionAsync();
@@ -2320,11 +2440,18 @@ const MemoriWidget = ({
2320
2440
  }
2321
2441
  setListening(false);
2322
2442
  };
2443
+
2444
+ /**
2445
+ * Clears all listening state and stops recognition
2446
+ */
2323
2447
  const clearListening = () => {
2324
- setHasUserActivatedListening(false);
2325
2448
  stopListening();
2326
2449
  clearListeningTimeout();
2450
+ setIsSpeaking(false);
2327
2451
  };
2452
+ /**
2453
+ * Resets listening state and restarts recognition if currently listening
2454
+ */
2328
2455
  const resetListening = () => {
2329
2456
  if (listening) {
2330
2457
  clearListening();
@@ -2383,19 +2510,22 @@ const MemoriWidget = ({
2383
2510
  // eslint-disable-next-line react-hooks/exhaustive-deps
2384
2511
  [continuousSpeech, hasUserActivatedListening]
2385
2512
  );
2513
+
2386
2514
  useEffect(() => {
2515
+ // if memori is speaking, don't start listening
2387
2516
  if (
2388
- history.length > 1 &&
2389
2517
  !isPlayingAudio &&
2390
2518
  continuousSpeech &&
2391
- (hasUserActivatedListening || !requestedListening)
2392
- )
2519
+ (hasUserActivatedListening || !requestedListening) &&
2520
+ sessionId
2521
+ ) {
2393
2522
  startListening();
2394
- else if (isPlayingAudio && listening) {
2523
+ } else if (isPlayingAudio && listening) {
2395
2524
  stopListening();
2396
2525
  }
2397
2526
  // eslint-disable-next-line react-hooks/exhaustive-deps
2398
- }, [isPlayingAudio]);
2527
+ }, [isPlayingAudio, hasUserActivatedListening]);
2528
+
2399
2529
  useEffect(() => {
2400
2530
  resetListening();
2401
2531
  // eslint-disable-next-line react-hooks/exhaustive-deps
@@ -2617,11 +2747,6 @@ const MemoriWidget = ({
2617
2747
  session?: { dialogState: DialogState; sessionID: string },
2618
2748
  initialSessionExpired = false
2619
2749
  ) => {
2620
- // console.log('[CLICK_START] Starting onClickStart with params:', {
2621
- // hasSession: !!session,
2622
- // initialSessionExpired
2623
- // });
2624
-
2625
2750
  const sessionID = session?.sessionID || sessionId;
2626
2751
  const dialogState = session?.dialogState || currentDialogState;
2627
2752
  setClickedStart(true);
@@ -2648,7 +2773,10 @@ const MemoriWidget = ({
2648
2773
  'birthDate',
2649
2774
  undefined
2650
2775
  );
2651
- let birth = birthDate || storageBirthDate || undefined;
2776
+ let birth = birthDate || storageBirthDate || user?.birthDate;
2777
+ if (!birth && autoStart && initialSessionID)
2778
+ birth = '1970-01-01T10:24:03.845Z';
2779
+
2652
2780
  // console.log('[CLICK_START] Using birth date:', birth);
2653
2781
 
2654
2782
  // Handle age verification
@@ -2926,6 +3054,8 @@ const MemoriWidget = ({
2926
3054
  (!!translatedMessages?.length && translatedMessages.length > 1) ||
2927
3055
  !initialQuestion
2928
3056
  ) {
3057
+ console.log('[CLICK_START] Using existing chat history');
3058
+
2929
3059
  // we have a history, don't push message
2930
3060
  translateDialogState(
2931
3061
  currentState,
@@ -2945,10 +3075,16 @@ const MemoriWidget = ({
2945
3075
  setHasUserActivatedSpeak(true);
2946
3076
  });
2947
3077
  } else {
3078
+ console.log(
3079
+ '[CLICK_START] Using existing chat history with message from initial question'
3080
+ );
3081
+
2948
3082
  // remove default initial message
2949
3083
  translatedMessages = [];
2950
3084
  setHistory([]);
2951
3085
 
3086
+ setMemoriTyping(true);
3087
+
2952
3088
  // we have no chat history, we start by initial question
2953
3089
  const response = await postTextEnteredEvent({
2954
3090
  sessionId: sessionID,
@@ -2968,6 +3104,7 @@ const MemoriWidget = ({
2968
3104
  }
2969
3105
  })
2970
3106
  .finally(() => {
3107
+ setMemoriTyping(false);
2971
3108
  setHasUserActivatedSpeak(true);
2972
3109
  });
2973
3110
  }
@@ -3002,6 +3139,10 @@ const MemoriWidget = ({
3002
3139
 
3003
3140
  useEffect(() => {
3004
3141
  if (!clickedStart && autoStart) {
3142
+ // Initialize TTS before starting if AZURE_COGNITIVE_SERVICES_TTS_KEY exists
3143
+ if (AZURE_COGNITIVE_SERVICES_TTS_KEY && !speechSynthesizer) {
3144
+ initializeTTS();
3145
+ }
3005
3146
  onClickStart();
3006
3147
  }
3007
3148
  }, [clickedStart, autoStart]);
@@ -3135,6 +3276,14 @@ const MemoriWidget = ({
3135
3276
  setSpeakerMuted: mute => {
3136
3277
  speakerMuted = !!mute;
3137
3278
  setMuteSpeaker(mute);
3279
+ let microphoneMode = getLocalConfig<string>(
3280
+ 'microphoneMode',
3281
+ 'HOLD_TO_TALK'
3282
+ );
3283
+ if (microphoneMode === 'CONTINUOUS' && mute) {
3284
+ setContinuousSpeech(false);
3285
+ setLocalConfig('microphoneMode', 'HOLD_TO_TALK');
3286
+ }
3138
3287
  setLocalConfig('muteSpeaker', !!mute);
3139
3288
  if (mute) {
3140
3289
  stopAudio();
@@ -3491,6 +3640,7 @@ const MemoriWidget = ({
3491
3640
  setEnablePositionControls={setEnablePositionControls}
3492
3641
  isAvatar3d={!!integrationConfig?.avatarURL}
3493
3642
  additionalSettings={additionalSettings}
3643
+ speakerMuted={speakerMuted}
3494
3644
  />
3495
3645
  )}
3496
3646