@memori.ai/memori-react 8.0.2 → 8.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/dist/components/Chat/Chat.d.ts +0 -2
  3. package/dist/components/Chat/Chat.js +2 -2
  4. package/dist/components/Chat/Chat.js.map +1 -1
  5. package/dist/components/ChatHistoryDrawer/ChatHistory.css +44 -2
  6. package/dist/components/ChatHistoryDrawer/ChatHistory.d.ts +4 -1
  7. package/dist/components/ChatHistoryDrawer/ChatHistory.js +150 -36
  8. package/dist/components/ChatHistoryDrawer/ChatHistory.js.map +1 -1
  9. package/dist/components/ChatInputs/ChatInputs.d.ts +0 -2
  10. package/dist/components/ChatInputs/ChatInputs.js +3 -4
  11. package/dist/components/ChatInputs/ChatInputs.js.map +1 -1
  12. package/dist/components/MemoriWidget/MemoriWidget.js +114 -339
  13. package/dist/components/MemoriWidget/MemoriWidget.js.map +1 -1
  14. package/dist/helpers/stt/useSTT.d.ts +40 -0
  15. package/dist/helpers/stt/useSTT.js +362 -0
  16. package/dist/helpers/stt/useSTT.js.map +1 -0
  17. package/dist/locales/de.json +12 -0
  18. package/dist/locales/en.json +11 -0
  19. package/dist/locales/es.json +12 -0
  20. package/dist/locales/fr.json +12 -0
  21. package/dist/locales/it.json +11 -0
  22. package/esm/components/Chat/Chat.d.ts +0 -2
  23. package/esm/components/Chat/Chat.js +2 -2
  24. package/esm/components/Chat/Chat.js.map +1 -1
  25. package/esm/components/ChatHistoryDrawer/ChatHistory.css +44 -2
  26. package/esm/components/ChatHistoryDrawer/ChatHistory.d.ts +4 -1
  27. package/esm/components/ChatHistoryDrawer/ChatHistory.js +150 -36
  28. package/esm/components/ChatHistoryDrawer/ChatHistory.js.map +1 -1
  29. package/esm/components/ChatInputs/ChatInputs.d.ts +0 -2
  30. package/esm/components/ChatInputs/ChatInputs.js +3 -4
  31. package/esm/components/ChatInputs/ChatInputs.js.map +1 -1
  32. package/esm/components/MemoriWidget/MemoriWidget.js +114 -339
  33. package/esm/components/MemoriWidget/MemoriWidget.js.map +1 -1
  34. package/esm/helpers/stt/useSTT.d.ts +40 -0
  35. package/esm/helpers/stt/useSTT.js +358 -0
  36. package/esm/helpers/stt/useSTT.js.map +1 -0
  37. package/esm/locales/de.json +12 -0
  38. package/esm/locales/en.json +11 -0
  39. package/esm/locales/es.json +12 -0
  40. package/esm/locales/fr.json +12 -0
  41. package/esm/locales/it.json +11 -0
  42. package/package.json +2 -3
  43. package/src/components/Chat/Chat.test.tsx +0 -9
  44. package/src/components/Chat/Chat.tsx +0 -6
  45. package/src/components/ChatHistoryDrawer/ChatHistory.css +44 -2
  46. package/src/components/ChatHistoryDrawer/ChatHistory.stories.tsx +40 -17
  47. package/src/components/ChatHistoryDrawer/ChatHistory.test.tsx +8 -0
  48. package/src/components/ChatHistoryDrawer/ChatHistory.tsx +194 -59
  49. package/src/components/ChatInputs/ChatInputs.test.tsx +0 -6
  50. package/src/components/ChatInputs/ChatInputs.tsx +2 -7
  51. package/src/components/MemoriWidget/MemoriWidget.tsx +169 -502
  52. package/src/helpers/stt/useSTT.ts +551 -0
  53. package/src/locales/de.json +12 -0
  54. package/src/locales/en.json +11 -0
  55. package/src/locales/es.json +12 -0
  56. package/src/locales/fr.json +12 -0
  57. package/src/locales/it.json +11 -0
@@ -16,12 +16,6 @@ import {
16
16
  ResponseSpec,
17
17
  ChatLog,
18
18
  } from '@memori.ai/memori-api-client/src/types';
19
- import {
20
- SpeakerAudioDestination,
21
- SpeechConfig,
22
- SpeechSynthesizer,
23
- SpeechRecognizer,
24
- } from 'microsoft-cognitiveservices-speech-sdk';
25
19
 
26
20
  // Libraries
27
21
  import React, {
@@ -35,7 +29,6 @@ import React, {
35
29
  import { useTranslation } from 'react-i18next';
36
30
  import memoriApiClient from '@memori.ai/memori-api-client';
37
31
  import { AudioContext, IAudioContext } from 'standardized-audio-context';
38
- import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk';
39
32
  import cx from 'classnames';
40
33
  import { DateTime } from 'luxon';
41
34
  import toast from 'react-hot-toast';
@@ -90,6 +83,7 @@ import { sanitizeText } from '../../helpers/sanitizer';
90
83
  import { TTSConfig, useTTS } from '../../helpers/tts/useTTS';
91
84
  import Alert from '../ui/Alert';
92
85
  import ChatHistoryDrawer from '../ChatHistoryDrawer/ChatHistory';
86
+ import { STTConfig, useSTT } from '../../helpers/stt/useSTT';
93
87
 
94
88
  // Widget utilities and helpers
95
89
  const getMemoriState = (integrationId?: string): object | null => {
@@ -327,10 +321,6 @@ window.typeMessage = typeMessage;
327
321
  window.typeMessageHidden = typeMessageHidden;
328
322
  window.typeBatchMessages = typeBatchMessages;
329
323
 
330
- // Global variables
331
- let recognizer: SpeechRecognizer | null;
332
- let speechConfig: SpeechConfig;
333
- let audioDestination: SpeakerAudioDestination;
334
324
  let audioContext: IAudioContext;
335
325
 
336
326
  let memoriPassword: string | undefined;
@@ -606,7 +596,7 @@ const MemoriWidget = ({
606
596
  );
607
597
  const [hideEmissions, setHideEmissions] = useState(false);
608
598
 
609
- const speechSynthesizerRef = useRef<SpeechSynthesizer | null>(null);
599
+ const speechSynthesizerRef = useRef<any | null>(null);
610
600
  const [memoriSpeaking, setMemoriSpeaking] = useState(false);
611
601
 
612
602
  useEffect(() => {
@@ -659,7 +649,8 @@ const MemoriWidget = ({
659
649
 
660
650
  // Effect to handle enableAudio changes
661
651
  useEffect(() => {
662
- const isAudioEnabled = enableAudio ?? integrationConfig?.enableAudio ?? true;
652
+ const isAudioEnabled =
653
+ enableAudio ?? integrationConfig?.enableAudio ?? true;
663
654
  if (!isAudioEnabled) {
664
655
  // Force mute when audio is disabled
665
656
  setLocalConfig('muteSpeaker', true);
@@ -832,7 +823,9 @@ const MemoriWidget = ({
832
823
  m => !m.mediumID && m.properties?.isAttachedFile
833
824
  );
834
825
  if (mediaDocuments && mediaDocuments.length > 0) {
835
- const documentContents = mediaDocuments.map(doc => doc.content).join(' ');
826
+ const documentContents = mediaDocuments
827
+ .map(doc => doc.content)
828
+ .join(' ');
836
829
  msg = msg + ' ' + documentContents;
837
830
  }
838
831
 
@@ -1088,6 +1081,64 @@ const MemoriWidget = ({
1088
1081
  const [birthDate, setBirthDate] = useState<string | undefined>();
1089
1082
  const [showAgeVerification, setShowAgeVerification] = useState(false);
1090
1083
 
1084
+ const getCultureCodeByLanguage = (lang?: string): string => {
1085
+ let voice = '';
1086
+ let voiceLang = (
1087
+ lang ||
1088
+ memori.culture?.split('-')?.[0] ||
1089
+ i18n.language ||
1090
+ 'IT'
1091
+ ).toUpperCase();
1092
+ switch (voiceLang) {
1093
+ case 'IT':
1094
+ voice = 'it-IT';
1095
+ break;
1096
+ case 'DE':
1097
+ voice = 'de-DE';
1098
+ break;
1099
+ case 'EN':
1100
+ voice = 'en-GB';
1101
+ break;
1102
+ case 'ES':
1103
+ voice = 'es-ES';
1104
+ break;
1105
+ case 'FR':
1106
+ voice = 'fr-FR';
1107
+ break;
1108
+ case 'PT':
1109
+ voice = 'pt-PT';
1110
+ break;
1111
+ case 'UK':
1112
+ voice = 'uk-UK';
1113
+ break;
1114
+ case 'RU':
1115
+ voice = 'ru-RU';
1116
+ break;
1117
+ case 'PL':
1118
+ voice = 'pl-PL';
1119
+ break;
1120
+ case 'FI':
1121
+ voice = 'fi-FI';
1122
+ break;
1123
+ case 'EL':
1124
+ voice = 'el-GR';
1125
+ break;
1126
+ case 'AR':
1127
+ voice = 'ar-SA';
1128
+ break;
1129
+ case 'ZH':
1130
+ voice = 'zh-CN';
1131
+ break;
1132
+ case 'JA':
1133
+ voice = 'ja-JP';
1134
+ break;
1135
+ default:
1136
+ voice = 'it-IT';
1137
+ break;
1138
+ }
1139
+ return voice;
1140
+ };
1141
+
1091
1142
  /**
1092
1143
  * Sessione
1093
1144
  */
@@ -1803,18 +1854,27 @@ const MemoriWidget = ({
1803
1854
  () => ({
1804
1855
  provider: ttsProvider,
1805
1856
  voice: getTTSVoice(
1806
- userLang || memori.culture?.split('-')?.[0] || 'EN',
1807
- ttsProvider,
1857
+ userLang || memori.culture?.split('-')?.[0] || 'EN',
1858
+ ttsProvider,
1808
1859
  memori.voiceType as 'MALE' | 'FEMALE' | 'NEUTRAL'
1809
1860
  ),
1810
1861
  tenant: tenantID,
1811
1862
  region: 'westeurope',
1812
1863
  voiceType: memori.voiceType,
1813
- layout: selectedLayout
1864
+ layout: selectedLayout,
1814
1865
  }),
1815
1866
  [ttsProvider, userLang, memori.culture, memori.voiceType]
1816
1867
  );
1817
1868
 
1869
+ const sttConfig = useMemo(
1870
+ () => ({
1871
+ provider: ttsProvider,
1872
+ language: getCultureCodeByLanguage(userLang),
1873
+ tenant: tenantID,
1874
+ }),
1875
+ [ttsProvider, userLang]
1876
+ );
1877
+
1818
1878
  // Initialize TTS hook with basic options first
1819
1879
  const {
1820
1880
  speak: ttsSpeak,
@@ -1826,15 +1886,61 @@ const MemoriWidget = ({
1826
1886
  setHasUserActivatedSpeak,
1827
1887
  error,
1828
1888
  setError,
1829
- } = useTTS(ttsConfig as TTSConfig, {
1830
- apiUrl: `${baseUrl}/api/tts`,
1831
- continuousSpeech: continuousSpeech,
1832
- onEndSpeakStartListen: () => {
1833
- // Placeholder - will be implemented after startListening is defined
1834
- console.log('[MemoriWidget] onEndSpeakStartListen called');
1889
+ } = useTTS(
1890
+ ttsConfig as TTSConfig,
1891
+ {
1892
+ apiUrl: `${baseUrl}/api/tts`,
1893
+ continuousSpeech: continuousSpeech,
1894
+ preview: preview,
1835
1895
  },
1836
- preview: preview,
1837
- }, autoStart, defaultEnableAudio, defaultSpeakerActive);
1896
+ autoStart,
1897
+ defaultEnableAudio,
1898
+ defaultSpeakerActive
1899
+ );
1900
+
1901
+ // Create a single, centralized function to process and send messages
1902
+ const processSpeechAndSendMessage = (text: string) => {
1903
+
1904
+ console.log('processSpeechAndSendMessage', text);
1905
+ // Skip if already processing or no text
1906
+ if (!text || text.trim().length === 0) {
1907
+ return;
1908
+ }
1909
+
1910
+ try {
1911
+ // Process the text
1912
+ const message = stripDuplicates(text);
1913
+ console.debug('Processing speech message:', message);
1914
+
1915
+ if (message.length > 0) {
1916
+ setUserMessage('');
1917
+
1918
+ // Send the message
1919
+ console.debug('Sending message:', message);
1920
+ sendMessage(message);
1921
+ }
1922
+ } catch (error) {
1923
+ console.error('Error in processSpeechAndSendMessage:', error);
1924
+ }
1925
+ };
1926
+
1927
+ const {
1928
+ isListening,
1929
+
1930
+ // Actions
1931
+ startRecording,
1932
+ stopRecording,
1933
+ } = useSTT(
1934
+ sttConfig as STTConfig,
1935
+ processSpeechAndSendMessage,
1936
+ {
1937
+ apiUrl: `${baseUrl}/api/stt`,
1938
+ continuousRecording: continuousSpeech,
1939
+ silenceTimeout: continuousSpeechTimeout,
1940
+ autoStart: autoStart,
1941
+ },
1942
+ defaultEnableAudio
1943
+ );
1838
1944
 
1839
1945
  const resetInteractionTimeout = () => {
1840
1946
  clearInteractionTimeout();
@@ -1948,64 +2054,6 @@ const MemoriWidget = ({
1948
2054
  hasUserActivatedSpeak,
1949
2055
  ]);
1950
2056
 
1951
- const getCultureCodeByLanguage = (lang?: string): string => {
1952
- let voice = '';
1953
- let voiceLang = (
1954
- lang ||
1955
- memori.culture?.split('-')?.[0] ||
1956
- i18n.language ||
1957
- 'IT'
1958
- ).toUpperCase();
1959
- switch (voiceLang) {
1960
- case 'IT':
1961
- voice = 'it-IT';
1962
- break;
1963
- case 'DE':
1964
- voice = 'de-DE';
1965
- break;
1966
- case 'EN':
1967
- voice = 'en-GB';
1968
- break;
1969
- case 'ES':
1970
- voice = 'es-ES';
1971
- break;
1972
- case 'FR':
1973
- voice = 'fr-FR';
1974
- break;
1975
- case 'PT':
1976
- voice = 'pt-PT';
1977
- break;
1978
- case 'UK':
1979
- voice = 'uk-UK';
1980
- break;
1981
- case 'RU':
1982
- voice = 'ru-RU';
1983
- break;
1984
- case 'PL':
1985
- voice = 'pl-PL';
1986
- break;
1987
- case 'FI':
1988
- voice = 'fi-FI';
1989
- break;
1990
- case 'EL':
1991
- voice = 'el-GR';
1992
- break;
1993
- case 'AR':
1994
- voice = 'ar-SA';
1995
- break;
1996
- case 'ZH':
1997
- voice = 'zh-CN';
1998
- break;
1999
- case 'JA':
2000
- voice = 'ja-JP';
2001
- break;
2002
- default:
2003
- voice = 'it-IT';
2004
- break;
2005
- }
2006
- return voice;
2007
- };
2008
-
2009
2057
  /**
2010
2058
  * Enhanced handleSpeak that integrates with the improved useTTS hook
2011
2059
  * Uses promise-based approach for better reliability
@@ -2015,14 +2063,11 @@ const MemoriWidget = ({
2015
2063
  const e = new CustomEvent('MemoriEndSpeak');
2016
2064
  document.dispatchEvent(e);
2017
2065
 
2018
- if (continuousSpeech) {
2019
- setListeningTimeout();
2020
- }
2021
2066
  return Promise.resolve();
2022
2067
  }
2023
2068
 
2024
- if (typeof stopListening === 'function') {
2025
- stopListening();
2069
+ if (typeof stopRecording === 'function') {
2070
+ stopRecording();
2026
2071
  }
2027
2072
 
2028
2073
  setMemoriTyping(true);
@@ -2037,7 +2082,7 @@ const MemoriWidget = ({
2037
2082
  setMemoriTyping(false);
2038
2083
  throw error;
2039
2084
  });
2040
- }
2085
+ };
2041
2086
  /**
2042
2087
  * Integrated solution for translating dialog state and speaking
2043
2088
  * This uses promise chaining for reliable sequencing without timeouts
@@ -2096,30 +2141,6 @@ const MemoriWidget = ({
2096
2141
  ]
2097
2142
  );
2098
2143
 
2099
- // Helper function for fallback behavior
2100
- const handleFallback = (text: string) => {
2101
- if (defaultEnableAudio) {
2102
- window.speechSynthesis.speak(new SpeechSynthesisUtterance(text));
2103
- }
2104
- cleanup();
2105
- };
2106
-
2107
- const cleanup = () => {
2108
- if (recognizer) {
2109
- recognizer.stopContinuousRecognitionAsync();
2110
- recognizer.close();
2111
- recognizer = null;
2112
- }
2113
-
2114
- if (speechSynthesizerRef.current) {
2115
- speechSynthesizerRef.current.close();
2116
- speechSynthesizerRef.current = null;
2117
- }
2118
-
2119
- setListening(false);
2120
- clearListeningTimeout();
2121
- };
2122
-
2123
2144
  /**
2124
2145
  * Funzione stopAudio che sostituisce quella originale
2125
2146
  */
@@ -2149,344 +2170,8 @@ const MemoriWidget = ({
2149
2170
  // eslint-disable-next-line react-hooks/exhaustive-deps
2150
2171
  }, [currentDialogState?.emission]);
2151
2172
 
2152
- const [transcript, setTranscript] = useState('');
2153
- const [transcriptTimeout, setTranscriptTimeout] =
2154
- useState<NodeJS.Timeout | null>(null);
2155
- const [isSpeaking, setIsSpeaking] = useState(false);
2156
- // const [isProcessingSTT, setIsProcessingSTT] = useState(false);
2157
-
2158
- const resetTranscript = () => {
2159
- setTranscript('');
2160
- };
2161
- // Modify setListeningTimeout to be more robust
2162
- const setListeningTimeout = () => {
2163
- clearListeningTimeout(); // Clear any existing timeout
2164
-
2165
- console.debug('Setting speech processing timeout');
2166
- const timeout = setTimeout(() => {
2167
- console.debug('Speech timeout triggered, processing transcript');
2168
- handleTranscriptProcessing();
2169
- }, continuousSpeechTimeout * 1000 + 300);
2170
-
2171
- setTranscriptTimeout(timeout as unknown as NodeJS.Timeout);
2172
- };
2173
-
2174
- const clearListeningTimeout = () => {
2175
- if (transcriptTimeout) {
2176
- console.debug('Clearing transcript timeout');
2177
- clearTimeout(transcriptTimeout);
2178
- setTranscriptTimeout(null);
2179
- }
2180
- };
2181
-
2182
- // Add safety check in resetListeningTimeout
2183
- const resetListeningTimeout = () => {
2184
- clearListeningTimeout();
2185
- if (continuousSpeech && !isProcessingSTT) {
2186
- console.debug('Setting new listening timeout');
2187
- setListeningTimeout();
2188
- }
2189
- };
2190
-
2191
- // Make sure only one path can trigger message sending
2192
- useEffect(() => {
2193
- if (!isSpeaking && transcript && transcript.length > 0) {
2194
- console.debug('Transcript updated while not speaking, resetting timeout');
2195
- resetListeningTimeout();
2196
- resetInteractionTimeout();
2197
- }
2198
- }, [transcript, isSpeaking]);
2199
-
2200
- // Clean up function for component unmount
2201
- useEffect(() => {
2202
- return () => {
2203
- clearListeningTimeout();
2204
- };
2205
- }, []);
2206
-
2207
- /**
2208
- * Listening methods
2209
- */
2210
- let microphoneStream: MediaStream | null = null;
2211
- // Modify startListening to ensure full cleanup before starting
2212
- const startListening = async (): Promise<void> => {
2213
- console.debug('Starting speech recognition...');
2214
-
2215
- // if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) {
2216
- // console.error('No TTS key available');
2217
- // throw new Error('No TTS key available');
2218
- // }
2219
-
2220
- if (!sessionId) {
2221
- console.error('No session ID available');
2222
- throw new Error('No session ID available');
2223
- }
2224
-
2225
- // First, ensure any existing recognizer is fully closed
2226
- if (recognizer) {
2227
- console.debug('Cleaning up existing recognizer...');
2228
- try {
2229
- // Stop the recognizer properly
2230
- await new Promise<void>((resolve, _) => {
2231
- recognizer?.stopContinuousRecognitionAsync(resolve, error => {
2232
- console.error('Error stopping recognition:', error);
2233
- resolve(); // Resolve anyway to continue cleanup
2234
- });
2235
- });
2236
-
2237
- console.debug('Closing existing recognizer...');
2238
- recognizer.close();
2239
- recognizer = null;
2240
- } catch (error) {
2241
- console.error('Error during recognizer cleanup:', error);
2242
- // Continue with initialization anyway
2243
- }
2244
- }
2245
-
2246
- // Clear any existing state
2247
- console.debug('Resetting transcript and STT state...');
2248
- resetTranscript();
2249
- setIsProcessingSTT(false);
2250
-
2251
- // Add a small delay to ensure Azure services have time to release resources
2252
- console.debug('Adding delay for Azure services cleanup...');
2253
- await new Promise(resolve => setTimeout(resolve, 500));
2254
-
2255
- try {
2256
- console.debug('Requesting microphone access...');
2257
- // Release previous microphone stream if it exists
2258
- if (microphoneStream) {
2259
- microphoneStream.getTracks().forEach(track => track.stop());
2260
- microphoneStream = null;
2261
- }
2262
-
2263
- const stream = await navigator.mediaDevices.getUserMedia({
2264
- audio: true,
2265
- });
2266
- setHasUserActivatedListening(true);
2267
-
2268
- // Recreate speech config each time
2269
- console.debug('Setting up speech config...');
2270
- // speechConfig = setupSpeechConfig(AZURE_COGNITIVE_SERVICES_TTS_KEY);
2271
-
2272
- console.debug('Creating audio config and recognizer...');
2273
- const audioConfig = speechSdk.AudioConfig.fromDefaultMicrophoneInput();
2274
- recognizer = new speechSdk.SpeechRecognizer(speechConfig, audioConfig);
2275
-
2276
- // Set up recognizer event handlers
2277
- console.debug('Setting up recognizer handlers...');
2278
- setupRecognizerHandlers(recognizer);
2279
-
2280
- // Start recognition - use promises for better error handling
2281
- console.debug('Starting continuous recognition...');
2282
- await new Promise<void>((resolve, reject) => {
2283
- recognizer?.startContinuousRecognitionAsync(resolve, error => {
2284
- console.error('Failed to start recognition:', error);
2285
- reject(error);
2286
- });
2287
- });
2288
-
2289
- console.debug('Speech recognition started successfully');
2290
- setListening(true);
2291
- } catch (error) {
2292
- console.error('Error in startListening:', error);
2293
- // Ensure cleanup happens even on error
2294
- if (recognizer) {
2295
- console.debug('Cleaning up recognizer after error...');
2296
- recognizer.close();
2297
- recognizer = null;
2298
- }
2299
- setListening(false);
2300
- throw error;
2301
- }
2302
- };
2303
-
2304
- // Store startListening in ref for use in onEndSpeakStartListen
2305
- startListeningRef.current = startListening;
2306
-
2307
- // Define proper onEndSpeakStartListen after startListening is available
2308
- const onEndSpeakStartListen = useCallback(
2309
- (_e?: Event) => {
2310
- if (isPlayingAudio && speechSynthesizerRef.current) {
2311
- speechSynthesizerRef.current.close();
2312
- speechSynthesizerRef.current = null;
2313
- }
2314
- if (
2315
- continuousSpeech &&
2316
- (hasUserActivatedListening || !requestedListening)
2317
- ) {
2318
- setRequestedListening(true);
2319
- if (startListeningRef.current) {
2320
- startListeningRef.current();
2321
- }
2322
- }
2323
- },
2324
- [continuousSpeech, hasUserActivatedListening, isPlayingAudio, requestedListening]
2325
- );
2326
-
2327
- const setupSpeechConfig = (AZURE_COGNITIVE_SERVICES_TTS_KEY: string) => {
2328
- console.debug('Creating speech config...');
2329
- speechConfig = speechSdk.SpeechConfig.fromSubscription(
2330
- AZURE_COGNITIVE_SERVICES_TTS_KEY,
2331
- 'westeurope'
2332
- );
2333
- console.debug('Setting speech recognition language:', userLang);
2334
- speechConfig.speechRecognitionLanguage = getCultureCodeByLanguage(userLang);
2335
- speechConfig.speechSynthesisLanguage = getCultureCodeByLanguage(userLang);
2336
- speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
2337
- return speechConfig;
2338
- };
2339
-
2340
- const [isProcessingSTT, setIsProcessingSTT] = useState(false);
2341
-
2342
- const setupRecognizerHandlers = (recognizer: speechSdk.SpeechRecognizer) => {
2343
- if (recognizer) {
2344
- console.debug('Setting up recognizer event handlers...');
2345
- recognizer.recognized = (_, event) => {
2346
- // Process the recognized speech result
2347
- console.debug('Recognition event received');
2348
- handleRecognizedSpeech(event.result.text);
2349
- };
2350
-
2351
- // Configure speech recognition properties directly on the recognizer
2352
- console.debug('Configuring recognizer properties...');
2353
- recognizer.properties.setProperty(
2354
- 'SpeechServiceResponse_JsonResult',
2355
- 'true'
2356
- );
2357
-
2358
- recognizer.properties.setProperty(
2359
- 'SpeechServiceConnection_NoiseSuppression',
2360
- 'true'
2361
- );
2362
-
2363
- recognizer.properties.setProperty(
2364
- 'SpeechServiceConnection_SNRThresholdDb',
2365
- '10.0'
2366
- );
2367
- }
2368
- };
2369
-
2370
- // Add a mutex-like flag to prevent duplicate processing
2371
- let isProcessingSpeech = false;
2372
-
2373
- // Create a single, centralized function to process and send messages
2374
- const processSpeechAndSendMessage = (text: string) => {
2375
- // Skip if already processing or no text
2376
- if (isProcessingSpeech || !text || text.trim().length === 0) {
2377
- console.debug(
2378
- 'Skipping speech processing: already processing or empty text'
2379
- );
2380
- return;
2381
- }
2382
-
2383
- try {
2384
- // Set processing flag immediately
2385
- isProcessingSpeech = true;
2386
-
2387
- // Process the text
2388
- const message = stripDuplicates(text);
2389
- console.debug('Processing speech message:', message);
2390
-
2391
- if (message.length > 0) {
2392
- // Update UI states
2393
- setIsProcessingSTT(true);
2394
- setUserMessage('');
2395
-
2396
- // Send the message
2397
- console.debug('Sending message:', message);
2398
- sendMessage(message);
2399
-
2400
- // Reset states
2401
- resetTranscript();
2402
- clearListening();
2403
- }
2404
- } finally {
2405
- // Reset processing flag after a short delay to prevent race conditions
2406
- setTimeout(() => {
2407
- isProcessingSpeech = false;
2408
- }, 1000);
2409
- }
2410
- };
2411
-
2412
- // Update handleRecognizedSpeech to use the centralized function
2413
- const handleRecognizedSpeech = (text: string) => {
2414
- console.debug('Speech recognized:', text);
2415
- setTranscript(text);
2416
- setIsSpeaking(false);
2417
-
2418
- // Don't process here - wait for timeout or explicit processing
2419
- if (!continuousSpeech) {
2420
- // For manual mode, process immediately
2421
- processSpeechAndSendMessage(text);
2422
- }
2423
- // For continuous mode, rely on the timeout
2424
- };
2425
-
2426
- // Update handleTranscriptProcessing to use the centralized function
2427
- const handleTranscriptProcessing = () => {
2428
- if (transcript && transcript.length > 0 && listening) {
2429
- processSpeechAndSendMessage(transcript);
2430
- } else if (listening) {
2431
- resetInteractionTimeout();
2432
- }
2433
- };
2434
-
2435
- /**
2436
- * Stops the speech recognition process
2437
- * Closes recognizer and cleans up resources
2438
- */
2439
- // Similarly, modify stopListening to use promises
2440
- // Enhance stopListening to fully release resources
2441
- const stopListening = async () => {
2442
- console.debug('Stopping speech recognition');
2443
-
2444
- // Stop the recognizer
2445
- if (recognizer) {
2446
- try {
2447
- recognizer.stopContinuousRecognitionAsync();
2448
- recognizer.close();
2449
- } catch (error) {
2450
- console.error('Error stopping recognizer:', error);
2451
- }
2452
- recognizer = null;
2453
- }
2454
-
2455
- // Release the microphone stream
2456
- if (microphoneStream) {
2457
- try {
2458
- microphoneStream.getTracks().forEach(track => track.stop());
2459
- } catch (error) {
2460
- console.error('Error stopping microphone stream:', error);
2461
- }
2462
- microphoneStream = null;
2463
- }
2464
-
2465
- setListening(false);
2466
- };
2467
-
2468
- /**
2469
- * Clears all listening state and stops recognition
2470
- */
2471
- const clearListening = () => {
2472
- stopListening();
2473
- clearListeningTimeout();
2474
- setIsSpeaking(false);
2475
- };
2476
- /**
2477
- * Resets listening state and restarts recognition if currently listening
2478
- */
2479
- const resetListening = () => {
2480
- if (listening) {
2481
- clearListening();
2482
- resetTranscript();
2483
- setUserMessage('');
2484
- startListening();
2485
- }
2486
- };
2487
2173
  const resetUIEffects = () => {
2488
2174
  try {
2489
- clearListening();
2490
2175
  clearInteractionTimeout();
2491
2176
  setClickedStart(false);
2492
2177
  timeoutRef.current = undefined;
@@ -2508,10 +2193,6 @@ const MemoriWidget = ({
2508
2193
  document.removeEventListener('MemoriResetUIEffects', resetUIEffects);
2509
2194
  };
2510
2195
  }, []);
2511
- useEffect(() => {
2512
- if (currentDialogState?.state === 'Z0') clearListening();
2513
- // eslint-disable-next-line react-hooks/exhaustive-deps
2514
- }, [currentDialogState?.state]);
2515
2196
 
2516
2197
  useEffect(() => {
2517
2198
  // if memori is speaking, don't start listening
@@ -2521,15 +2202,15 @@ const MemoriWidget = ({
2521
2202
  (hasUserActivatedListening || !requestedListening) &&
2522
2203
  sessionId
2523
2204
  ) {
2524
- startListening();
2525
- } else if (isPlayingAudio && listening) {
2526
- stopListening();
2205
+ startRecording();
2206
+ } else if (isPlayingAudio && isListening) {
2207
+ stopRecording();
2527
2208
  }
2528
2209
  // eslint-disable-next-line react-hooks/exhaustive-deps
2529
2210
  }, [isPlayingAudio, hasUserActivatedListening]);
2530
2211
 
2531
2212
  useEffect(() => {
2532
- resetListening();
2213
+ stopRecording();
2533
2214
  // eslint-disable-next-line react-hooks/exhaustive-deps
2534
2215
  }, [language]);
2535
2216
 
@@ -2668,7 +2349,6 @@ const MemoriWidget = ({
2668
2349
  }, [integrationConfig, memori.avatarURL, ogImage]);
2669
2350
 
2670
2351
  const simulateUserPrompt = (text: string, translatedText?: string) => {
2671
- stopListening();
2672
2352
  stopAudio();
2673
2353
  sendMessage(text, undefined, undefined, false, translatedText);
2674
2354
  };
@@ -2699,7 +2379,6 @@ const MemoriWidget = ({
2699
2379
  memoriTextEnteredHandler(e);
2700
2380
  }, 1000);
2701
2381
  } else {
2702
- stopListening();
2703
2382
  stopAudio();
2704
2383
  sendMessage(
2705
2384
  text,
@@ -2864,13 +2543,20 @@ const MemoriWidget = ({
2864
2543
  ) {
2865
2544
  try {
2866
2545
  translatedMessages = await Promise.all(
2867
- messages.map(async m => ({
2868
- ...m,
2869
- originalText: m.text,
2870
- text: (
2871
- await getTranslation(m.text, userLang, language, baseUrl)
2872
- ).text,
2873
- }))
2546
+ messages.map(async m => {
2547
+ // If original text is present, the message is already translated
2548
+ if ('originalText' in m && m.originalText) {
2549
+ return m;
2550
+ }
2551
+ // Otherwise translate the message
2552
+ return {
2553
+ ...m,
2554
+ originalText: m.text,
2555
+ text: (
2556
+ await getTranslation(m.text, userLang, language, baseUrl)
2557
+ ).text,
2558
+ };
2559
+ })
2874
2560
  );
2875
2561
  } catch (e) {
2876
2562
  console.error('[CLICK_START] Error translating messages:', e);
@@ -3008,10 +2694,7 @@ const MemoriWidget = ({
3008
2694
  // No tag changes needed
3009
2695
  else {
3010
2696
  try {
3011
- const { chatLogs } = await getSessionChatLogs(
3012
- sessionID,
3013
- sessionID
3014
- );
2697
+ const { chatLogs } = await getSessionChatLogs(sessionID, sessionID);
3015
2698
 
3016
2699
  const messages = chatLogs?.[0]?.lines.map(
3017
2700
  (l, i) =>
@@ -3068,7 +2751,7 @@ const MemoriWidget = ({
3068
2751
  // if empty history, pick current state emission
3069
2752
  // otherwise, don't push message
3070
2753
  !!translatedMessages?.length
3071
- )
2754
+ );
3072
2755
  } else {
3073
2756
  // remove default initial message
3074
2757
  translatedMessages = [];
@@ -3087,7 +2770,7 @@ const MemoriWidget = ({
3087
2770
  userLang,
3088
2771
  undefined,
3089
2772
  false
3090
- )
2773
+ );
3091
2774
  }
3092
2775
  }
3093
2776
 
@@ -3270,7 +2953,7 @@ const MemoriWidget = ({
3270
2953
  if (!(enableAudio ?? integrationConfig?.enableAudio ?? true)) {
3271
2954
  mute = true;
3272
2955
  }
3273
-
2956
+
3274
2957
  toggleMute(mute);
3275
2958
  let microphoneMode = getLocalConfig<string>(
3276
2959
  'microphoneMode',
@@ -3316,7 +2999,10 @@ const MemoriWidget = ({
3316
2999
  avatar3dVisible,
3317
3000
  setAvatar3dVisible,
3318
3001
  hasUserActivatedSpeak,
3319
- isPlayingAudio: isPlayingAudio && !speakerMuted && (enableAudio ?? integrationConfig?.enableAudio ?? true),
3002
+ isPlayingAudio:
3003
+ isPlayingAudio &&
3004
+ !speakerMuted &&
3005
+ (enableAudio ?? integrationConfig?.enableAudio ?? true),
3320
3006
  loading: !!memoriTyping,
3321
3007
  baseUrl,
3322
3008
  apiUrl: client.constants.BACKEND_URL,
@@ -3353,7 +3039,6 @@ const MemoriWidget = ({
3353
3039
  memori,
3354
3040
  sessionID: sessionId || '',
3355
3041
  tenant,
3356
- provider: ttsProvider as 'azure' | 'openai',
3357
3042
  translateTo:
3358
3043
  isMultilanguageEnabled &&
3359
3044
  userLang.toUpperCase() !==
@@ -3393,22 +3078,21 @@ const MemoriWidget = ({
3393
3078
  attachmentsMenuOpen,
3394
3079
  setAttachmentsMenuOpen,
3395
3080
  showInputs,
3396
- showMicrophone: !!ttsProvider && (enableAudio ?? integrationConfig?.enableAudio ?? true),
3081
+ showMicrophone:
3082
+ !!ttsProvider && (enableAudio ?? integrationConfig?.enableAudio ?? true),
3397
3083
  showFunctionCache,
3398
3084
  userMessage,
3399
3085
  onChangeUserMessage,
3400
3086
  sendMessage: (msg: string, media?: (Medium & { type: string })[]) => {
3401
3087
  stopAudio();
3402
- stopListening();
3088
+ stopRecording();
3403
3089
  sendMessage(msg, media);
3404
3090
  setUserMessage('');
3405
- resetTranscript();
3406
3091
  },
3407
- stopListening: clearListening,
3408
- startListening,
3092
+ stopListening: stopRecording,
3093
+ startListening: startRecording,
3409
3094
  stopAudio,
3410
- resetTranscript,
3411
- listening,
3095
+ listening: isListening,
3412
3096
  setEnableFocusChatInput,
3413
3097
  isPlayingAudio,
3414
3098
  customMediaRenderer,
@@ -3628,26 +3312,6 @@ const MemoriWidget = ({
3628
3312
  isAvatar3d={!!integrationConfig?.avatarURL}
3629
3313
  additionalSettings={additionalSettings}
3630
3314
  speakerMuted={speakerMuted}
3631
-
3632
- />
3633
- )}
3634
-
3635
- {showChatHistoryDrawer && (
3636
- <ChatHistoryDrawer
3637
- open={!!showChatHistoryDrawer}
3638
- onClose={() => setShowChatHistoryDrawer(false)}
3639
- resumeSession={chatLog => {
3640
- setChatLogID(chatLog.chatLogID);
3641
- onClickStart(undefined, false, chatLog);
3642
- setShowChatHistoryDrawer(false);
3643
- }}
3644
- apiClient={client}
3645
- sessionId={sessionId || ''}
3646
- memori={memori}
3647
- baseUrl={baseUrl}
3648
- history={history}
3649
- apiUrl={client.constants.BACKEND_URL}
3650
- loginToken={loginToken}
3651
3315
  />
3652
3316
  )}
3653
3317
 
@@ -3667,6 +3331,9 @@ const MemoriWidget = ({
3667
3331
  history={history}
3668
3332
  apiUrl={client.constants.BACKEND_URL}
3669
3333
  loginToken={loginToken}
3334
+ language={language}
3335
+ userLang={userLang}
3336
+ isMultilanguageEnabled={isMultilanguageEnabled}
3670
3337
  />
3671
3338
  )}
3672
3339