@memori.ai/memori-react 7.16.2 → 7.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.d.ts +27 -0
- package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js +52 -0
- package/dist/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js.map +1 -0
- package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
- package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js +7 -7
- package/dist/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js.map +1 -1
- package/dist/components/Avatar/AvatarView/index.js +2 -3
- package/dist/components/Avatar/AvatarView/index.js.map +1 -1
- package/dist/components/ChatTextArea/ChatTextArea.css +55 -60
- package/dist/components/MemoriWidget/MemoriWidget.js +215 -138
- package/dist/components/MemoriWidget/MemoriWidget.js.map +1 -1
- package/dist/components/SettingsDrawer/SettingsDrawer.css +5 -0
- package/dist/components/SettingsDrawer/SettingsDrawer.d.ts +2 -1
- package/dist/components/SettingsDrawer/SettingsDrawer.js +6 -3
- package/dist/components/SettingsDrawer/SettingsDrawer.js.map +1 -1
- package/dist/components/UploadButton/UploadButton.d.ts +5 -0
- package/dist/components/UploadButton/UploadButton.js +49 -48
- package/dist/components/UploadButton/UploadButton.js.map +1 -1
- package/dist/components/ui/Slider.css +59 -44
- package/dist/context/visemeContext.d.ts +1 -1
- package/dist/context/visemeContext.js +2 -2
- package/dist/context/visemeContext.js.map +1 -1
- package/dist/locales/de.json +1 -0
- package/dist/locales/en.json +1 -0
- package/dist/locales/es.json +1 -0
- package/dist/locales/fr.json +1 -0
- package/dist/locales/it.json +1 -0
- package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.d.ts +27 -0
- package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js +48 -0
- package/esm/components/Avatar/AvatarView/AvatarComponent/lights/Lights.js.map +1 -0
- package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
- package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js +7 -7
- package/esm/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.js.map +1 -1
- package/esm/components/Avatar/AvatarView/index.js +3 -4
- package/esm/components/Avatar/AvatarView/index.js.map +1 -1
- package/esm/components/ChatTextArea/ChatTextArea.css +55 -60
- package/esm/components/MemoriWidget/MemoriWidget.js +216 -139
- package/esm/components/MemoriWidget/MemoriWidget.js.map +1 -1
- package/esm/components/SettingsDrawer/SettingsDrawer.css +5 -0
- package/esm/components/SettingsDrawer/SettingsDrawer.d.ts +2 -1
- package/esm/components/SettingsDrawer/SettingsDrawer.js +6 -3
- package/esm/components/SettingsDrawer/SettingsDrawer.js.map +1 -1
- package/esm/components/UploadButton/UploadButton.d.ts +5 -0
- package/esm/components/UploadButton/UploadButton.js +50 -49
- package/esm/components/UploadButton/UploadButton.js.map +1 -1
- package/esm/components/ui/Slider.css +59 -44
- package/esm/context/visemeContext.d.ts +1 -1
- package/esm/context/visemeContext.js +2 -2
- package/esm/context/visemeContext.js.map +1 -1
- package/esm/locales/de.json +1 -0
- package/esm/locales/en.json +1 -0
- package/esm/locales/es.json +1 -0
- package/esm/locales/fr.json +1 -0
- package/esm/locales/it.json +1 -0
- package/package.json +1 -2
- package/src/components/Avatar/AvatarView/AvatarComponent/lights/Lights.tsx +145 -0
- package/src/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.css +19 -7
- package/src/components/Avatar/AvatarView/AvatarComponent/positionControls/positionControls.tsx +6 -14
- package/src/components/Avatar/AvatarView/index.tsx +5 -14
- package/src/components/ChatTextArea/ChatTextArea.css +55 -60
- package/src/components/MemoriWidget/MemoriWidget.tsx +337 -187
- package/src/components/SettingsDrawer/SettingsDrawer.css +5 -0
- package/src/components/SettingsDrawer/SettingsDrawer.tsx +29 -11
- package/src/components/UploadButton/UploadButton.tsx +139 -118
- package/src/components/UploadButton/__snapshots__/UploadButton.test.tsx.snap +3 -52
- package/src/components/ui/Slider.css +59 -44
- package/src/context/visemeContext.tsx +2 -2
- package/src/locales/de.json +1 -0
- package/src/locales/en.json +1 -0
- package/src/locales/es.json +1 -0
- package/src/locales/fr.json +1 -0
- package/src/locales/it.json +1 -0
|
@@ -31,7 +31,11 @@ import React, {
|
|
|
31
31
|
} from 'react';
|
|
32
32
|
import { useTranslation } from 'react-i18next';
|
|
33
33
|
import memoriApiClient from '@memori.ai/memori-api-client';
|
|
34
|
-
import {
|
|
34
|
+
import {
|
|
35
|
+
AudioContext,
|
|
36
|
+
IAudioBufferSourceNode,
|
|
37
|
+
IAudioContext,
|
|
38
|
+
} from 'standardized-audio-context';
|
|
35
39
|
import * as speechSdk from 'microsoft-cognitiveservices-speech-sdk';
|
|
36
40
|
import cx from 'classnames';
|
|
37
41
|
import { DateTime } from 'luxon';
|
|
@@ -596,7 +600,6 @@ const MemoriWidget = ({
|
|
|
596
600
|
} = useViseme();
|
|
597
601
|
|
|
598
602
|
useEffect(() => {
|
|
599
|
-
setIsPlayingAudio(!!speechSynthesizer);
|
|
600
603
|
memoriSpeaking = !!speechSynthesizer;
|
|
601
604
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
602
605
|
}, [speechSynthesizer]);
|
|
@@ -624,20 +627,17 @@ const MemoriWidget = ({
|
|
|
624
627
|
defaultControlsPosition = 'bottom';
|
|
625
628
|
}
|
|
626
629
|
|
|
627
|
-
|
|
628
|
-
autoStart ||
|
|
629
|
-
getLocalConfig(
|
|
630
|
-
'muteSpeaker',
|
|
631
|
-
!defaultEnableAudio || !defaultSpeakerActive || autoStart
|
|
632
|
-
)
|
|
633
|
-
);
|
|
634
|
-
speakerMuted =
|
|
630
|
+
const muteSpeaker =
|
|
635
631
|
autoStart ||
|
|
636
632
|
getLocalConfig(
|
|
637
633
|
'muteSpeaker',
|
|
638
634
|
!defaultEnableAudio || !defaultSpeakerActive || autoStart
|
|
639
635
|
);
|
|
640
|
-
|
|
636
|
+
|
|
637
|
+
setMuteSpeaker(muteSpeaker);
|
|
638
|
+
speakerMuted = muteSpeaker;
|
|
639
|
+
|
|
640
|
+
setContinuousSpeech(muteSpeaker ? false : microphoneMode === 'CONTINUOUS');
|
|
641
641
|
setContinuousSpeechTimeout(getLocalConfig('continuousSpeechTimeout', 2));
|
|
642
642
|
setControlsPosition(
|
|
643
643
|
getLocalConfig('controlsPosition', defaultControlsPosition)
|
|
@@ -1971,15 +1971,24 @@ const MemoriWidget = ({
|
|
|
1971
1971
|
};
|
|
1972
1972
|
|
|
1973
1973
|
const speak = (text: string): void => {
|
|
1974
|
+
console.debug('speak called with text:', text);
|
|
1975
|
+
|
|
1974
1976
|
if (!AZURE_COGNITIVE_SERVICES_TTS_KEY || preview) {
|
|
1977
|
+
console.debug('No TTS key or preview mode, emitting end speak event');
|
|
1975
1978
|
emitEndSpeakEvent();
|
|
1976
1979
|
return;
|
|
1977
1980
|
}
|
|
1981
|
+
|
|
1982
|
+
console.debug('Stopping listening before speaking');
|
|
1978
1983
|
stopListening();
|
|
1979
1984
|
|
|
1980
|
-
if (preview)
|
|
1985
|
+
if (preview) {
|
|
1986
|
+
console.debug('Preview mode, returning early');
|
|
1987
|
+
return;
|
|
1988
|
+
}
|
|
1981
1989
|
|
|
1982
1990
|
if (speakerMuted) {
|
|
1991
|
+
console.debug('Speaker muted, skipping speech synthesis');
|
|
1983
1992
|
memoriSpeaking = false;
|
|
1984
1993
|
setMemoriTyping(false);
|
|
1985
1994
|
|
|
@@ -1987,29 +1996,40 @@ const MemoriWidget = ({
|
|
|
1987
1996
|
|
|
1988
1997
|
// trigger start continuous listening if set, see MemoriChat
|
|
1989
1998
|
if (continuousSpeech) {
|
|
1999
|
+
console.debug('Setting listening timeout for continuous speech');
|
|
1990
2000
|
setListeningTimeout();
|
|
1991
2001
|
}
|
|
1992
2002
|
return;
|
|
1993
2003
|
}
|
|
1994
2004
|
|
|
1995
|
-
if (audioDestination)
|
|
2005
|
+
if (audioDestination) {
|
|
2006
|
+
console.debug('Pausing existing audio destination');
|
|
2007
|
+
audioDestination.pause();
|
|
2008
|
+
}
|
|
1996
2009
|
|
|
1997
2010
|
let isSafari =
|
|
1998
2011
|
window.navigator.userAgent.includes('Safari') &&
|
|
1999
2012
|
!window.navigator.userAgent.includes('Chrome');
|
|
2000
2013
|
let isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent);
|
|
2001
2014
|
|
|
2015
|
+
console.debug('Browser detection - Safari:', isSafari, 'iOS:', isIOS);
|
|
2016
|
+
|
|
2002
2017
|
if ((audioContext.state as string) === 'interrupted') {
|
|
2018
|
+
console.debug('Audio context interrupted, attempting resume');
|
|
2003
2019
|
audioContext.resume().then(() => speak(text));
|
|
2004
2020
|
return;
|
|
2005
2021
|
}
|
|
2006
2022
|
if (audioContext.state === 'closed') {
|
|
2023
|
+
console.debug('Audio context closed, creating new context');
|
|
2007
2024
|
audioContext = new AudioContext();
|
|
2008
2025
|
let buffer = audioContext.createBuffer(1, 10000, 22050);
|
|
2009
2026
|
let source = audioContext.createBufferSource();
|
|
2010
2027
|
source.buffer = buffer;
|
|
2011
2028
|
source.connect(audioContext.destination);
|
|
2012
2029
|
} else if (audioContext.state === 'suspended') {
|
|
2030
|
+
console.debug(
|
|
2031
|
+
'Audio context suspended, stopping audio and creating new context'
|
|
2032
|
+
);
|
|
2013
2033
|
stopAudio();
|
|
2014
2034
|
|
|
2015
2035
|
audioContext = new AudioContext();
|
|
@@ -2020,23 +2040,17 @@ const MemoriWidget = ({
|
|
|
2020
2040
|
}
|
|
2021
2041
|
|
|
2022
2042
|
if (!speechSynthesizer) {
|
|
2023
|
-
|
|
2024
|
-
audioDestination = new speechSdk.SpeakerAudioDestination();
|
|
2025
|
-
}
|
|
2026
|
-
let audioConfig =
|
|
2027
|
-
speechSdk.AudioConfig.fromSpeakerOutput(audioDestination);
|
|
2028
|
-
speechSynthesizer = new speechSdk.SpeechSynthesizer(
|
|
2029
|
-
speechConfig,
|
|
2030
|
-
audioConfig
|
|
2031
|
-
);
|
|
2043
|
+
initializeTTS();
|
|
2032
2044
|
}
|
|
2033
2045
|
|
|
2034
2046
|
const source = audioContext.createBufferSource();
|
|
2035
2047
|
source.addEventListener('ended', () => {
|
|
2048
|
+
console.debug('Audio source ended');
|
|
2036
2049
|
setIsPlayingAudio(false);
|
|
2037
2050
|
memoriSpeaking = false;
|
|
2038
2051
|
});
|
|
2039
2052
|
audioDestination.onAudioEnd = () => {
|
|
2053
|
+
console.debug('Audio destination ended');
|
|
2040
2054
|
setIsPlayingAudio(false);
|
|
2041
2055
|
memoriSpeaking = false;
|
|
2042
2056
|
source.disconnect();
|
|
@@ -2048,100 +2062,134 @@ const MemoriWidget = ({
|
|
|
2048
2062
|
};
|
|
2049
2063
|
|
|
2050
2064
|
// Clear any existing visemes before starting new speech
|
|
2065
|
+
console.debug('Resetting viseme queue');
|
|
2051
2066
|
resetVisemeQueue();
|
|
2052
2067
|
|
|
2053
2068
|
// Set up the viseme event handler
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2069
|
+
if (speechSynthesizer) {
|
|
2070
|
+
speechSynthesizer.visemeReceived = function (_, e) {
|
|
2071
|
+
console.debug(
|
|
2072
|
+
'Viseme received:',
|
|
2073
|
+
e.visemeId,
|
|
2074
|
+
'at offset:',
|
|
2075
|
+
e.audioOffset
|
|
2076
|
+
);
|
|
2077
|
+
addViseme(e.visemeId, e.audioOffset);
|
|
2078
|
+
};
|
|
2079
|
+
}
|
|
2057
2080
|
|
|
2058
2081
|
// Set up viseme handling
|
|
2059
2082
|
const textToSpeak = escapeHTML(
|
|
2060
2083
|
stripMarkdown(stripEmojis(stripHTML(stripOutputTags(text))))
|
|
2061
2084
|
);
|
|
2085
|
+
console.debug('Processed text to speak:', textToSpeak);
|
|
2062
2086
|
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2087
|
+
setTimeout(() => {
|
|
2088
|
+
if (speechSynthesizer) {
|
|
2089
|
+
console.debug('Starting speech synthesis');
|
|
2090
|
+
speechSynthesizer.speakSsmlAsync(
|
|
2091
|
+
`<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" xml:lang="${getCultureCodeByLanguage(
|
|
2092
|
+
userLang
|
|
2093
|
+
)}"><voice name="${getTTSVoice(
|
|
2094
|
+
userLang
|
|
2095
|
+
)}"><s>${replaceTextWithPhonemes(
|
|
2096
|
+
textToSpeak,
|
|
2097
|
+
userLang.toLowerCase()
|
|
2098
|
+
)}</s></voice></speak>`,
|
|
2099
|
+
result => {
|
|
2100
|
+
if (result) {
|
|
2101
|
+
console.debug('Speech synthesis successful');
|
|
2102
|
+
setIsPlayingAudio(true);
|
|
2103
|
+
memoriSpeaking = true;
|
|
2104
|
+
|
|
2105
|
+
// Process the viseme data
|
|
2106
|
+
startProcessing(audioContext);
|
|
2077
2107
|
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2108
|
+
try {
|
|
2109
|
+
// Decode the audio data
|
|
2110
|
+
console.debug('Decoding audio data');
|
|
2111
|
+
audioContext.decodeAudioData(
|
|
2112
|
+
result.audioData,
|
|
2113
|
+
function (buffer) {
|
|
2114
|
+
console.debug('Audio data decoded successfully');
|
|
2115
|
+
source.buffer = buffer;
|
|
2116
|
+
source.connect(audioContext.destination);
|
|
2117
|
+
|
|
2118
|
+
if (history.length < 1 || (isSafari && isIOS)) {
|
|
2119
|
+
console.debug('Starting audio playback');
|
|
2120
|
+
source.start(0);
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
);
|
|
2083
2124
|
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2125
|
+
// Handle the audio context state changes
|
|
2126
|
+
audioContext.onstatechange = () => {
|
|
2127
|
+
console.debug(
|
|
2128
|
+
'Audio context state changed to:',
|
|
2129
|
+
audioContext.state
|
|
2130
|
+
);
|
|
2131
|
+
if (
|
|
2132
|
+
audioContext.state === 'suspended' ||
|
|
2133
|
+
audioContext.state === 'closed'
|
|
2134
|
+
) {
|
|
2135
|
+
source.disconnect();
|
|
2136
|
+
setIsPlayingAudio(false);
|
|
2137
|
+
stopProcessing();
|
|
2138
|
+
resetVisemeQueue();
|
|
2139
|
+
memoriSpeaking = false;
|
|
2140
|
+
} else if ((audioContext.state as string) === 'interrupted') {
|
|
2141
|
+
audioContext.resume();
|
|
2142
|
+
}
|
|
2143
|
+
};
|
|
2088
2144
|
|
|
2089
|
-
// Handle the audio context state changes
|
|
2090
|
-
audioContext.onstatechange = () => {
|
|
2091
|
-
if (
|
|
2092
|
-
audioContext.state === 'suspended' ||
|
|
2093
|
-
audioContext.state === 'closed'
|
|
2094
|
-
) {
|
|
2095
|
-
source.disconnect();
|
|
2096
|
-
setIsPlayingAudio(false);
|
|
2097
|
-
stopProcessing();
|
|
2098
|
-
resetVisemeQueue();
|
|
2099
|
-
memoriSpeaking = false;
|
|
2100
|
-
} else if ((audioContext.state as string) === 'interrupted') {
|
|
2101
2145
|
audioContext.resume();
|
|
2102
|
-
}
|
|
2103
|
-
};
|
|
2104
|
-
|
|
2105
|
-
audioContext.resume();
|
|
2106
2146
|
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2147
|
+
if (speechSynthesizer) {
|
|
2148
|
+
console.debug('Closing speech synthesizer');
|
|
2149
|
+
speechSynthesizer.close();
|
|
2150
|
+
speechSynthesizer = null;
|
|
2151
|
+
}
|
|
2152
|
+
} catch (error) {
|
|
2153
|
+
console.error('Error processing audio data:', error);
|
|
2154
|
+
handleFallback(text);
|
|
2155
|
+
}
|
|
2156
|
+
} else {
|
|
2157
|
+
console.debug('No result from speech synthesis, using fallback');
|
|
2158
|
+
handleFallback(text);
|
|
2110
2159
|
}
|
|
2111
|
-
}
|
|
2112
|
-
|
|
2160
|
+
},
|
|
2161
|
+
error => {
|
|
2162
|
+
console.error('Speak error:', error);
|
|
2113
2163
|
handleFallback(text);
|
|
2114
2164
|
}
|
|
2115
|
-
|
|
2116
|
-
handleFallback(text);
|
|
2117
|
-
}
|
|
2118
|
-
},
|
|
2119
|
-
error => {
|
|
2120
|
-
console.error('Speak error:', error);
|
|
2121
|
-
handleFallback(text);
|
|
2165
|
+
);
|
|
2122
2166
|
}
|
|
2123
|
-
);
|
|
2124
|
-
|
|
2167
|
+
}, 100);
|
|
2125
2168
|
setMemoriTyping(false);
|
|
2126
2169
|
};
|
|
2127
2170
|
|
|
2128
2171
|
// Helper function for fallback behavior
|
|
2129
2172
|
const handleFallback = (text: string) => {
|
|
2130
|
-
console.log('Falling back to browser speech synthesis');
|
|
2131
2173
|
window.speechSynthesis.speak(new SpeechSynthesisUtterance(text));
|
|
2132
2174
|
cleanup();
|
|
2133
2175
|
};
|
|
2134
2176
|
|
|
2135
|
-
|
|
2136
|
-
const cleanup = () => {
|
|
2177
|
+
const cleanup = (): void => {
|
|
2137
2178
|
setIsPlayingAudio(false);
|
|
2138
2179
|
stopProcessing();
|
|
2139
2180
|
resetVisemeQueue();
|
|
2140
2181
|
memoriSpeaking = false;
|
|
2141
2182
|
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2183
|
+
try {
|
|
2184
|
+
if (speechSynthesizer) {
|
|
2185
|
+
const currentSynthesizer = speechSynthesizer;
|
|
2186
|
+
speechSynthesizer = null; // Clear reference first
|
|
2187
|
+
console.debug('Closing speech synthesizer');
|
|
2188
|
+
currentSynthesizer.close();
|
|
2189
|
+
}
|
|
2190
|
+
} catch (error) {
|
|
2191
|
+
console.debug('Error during synthesizer cleanup:', error);
|
|
2192
|
+
// Even if close fails, ensure synthesizer is nullified
|
|
2145
2193
|
speechSynthesizer = null;
|
|
2146
2194
|
}
|
|
2147
2195
|
|
|
@@ -2149,18 +2197,25 @@ const MemoriWidget = ({
|
|
|
2149
2197
|
};
|
|
2150
2198
|
|
|
2151
2199
|
// Modify stopAudio to include speech state reset
|
|
2152
|
-
const stopAudio = () => {
|
|
2200
|
+
const stopAudio = async (): Promise<void> => {
|
|
2153
2201
|
setIsPlayingAudio(false);
|
|
2154
2202
|
memoriSpeaking = false;
|
|
2155
2203
|
|
|
2156
2204
|
try {
|
|
2157
2205
|
if (speechSynthesizer) {
|
|
2158
|
-
speechSynthesizer
|
|
2206
|
+
const currentSynthesizer = speechSynthesizer;
|
|
2159
2207
|
speechSynthesizer = null;
|
|
2208
|
+
try {
|
|
2209
|
+
currentSynthesizer.close();
|
|
2210
|
+
} catch (e) {
|
|
2211
|
+
console.debug('Error closing speech synthesizer:', e);
|
|
2212
|
+
}
|
|
2160
2213
|
}
|
|
2161
|
-
|
|
2214
|
+
|
|
2215
|
+
if (audioContext?.state !== 'closed') {
|
|
2162
2216
|
audioContext.close();
|
|
2163
2217
|
}
|
|
2218
|
+
|
|
2164
2219
|
if (audioDestination) {
|
|
2165
2220
|
audioDestination.pause();
|
|
2166
2221
|
audioDestination.close();
|
|
@@ -2169,7 +2224,6 @@ const MemoriWidget = ({
|
|
|
2169
2224
|
console.debug('stopAudio error: ', e);
|
|
2170
2225
|
}
|
|
2171
2226
|
};
|
|
2172
|
-
|
|
2173
2227
|
const focusChatInput = () => {
|
|
2174
2228
|
let textarea = document.querySelector(
|
|
2175
2229
|
'#chat-fieldset textarea'
|
|
@@ -2192,126 +2246,192 @@ const MemoriWidget = ({
|
|
|
2192
2246
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
2193
2247
|
}, [currentDialogState?.emission]);
|
|
2194
2248
|
|
|
2195
|
-
/**
|
|
2196
|
-
* Speech recognition and transcript management
|
|
2197
|
-
*/
|
|
2198
2249
|
const [transcript, setTranscript] = useState('');
|
|
2199
|
-
const resetTranscript = () => setTranscript('');
|
|
2200
|
-
|
|
2201
|
-
/**
|
|
2202
|
-
* Listening transcript timeout
|
|
2203
|
-
*/
|
|
2204
2250
|
const [transcriptTimeout, setTranscriptTimeout] =
|
|
2205
2251
|
useState<NodeJS.Timeout | null>(null);
|
|
2252
|
+
const [isSpeaking, setIsSpeaking] = useState(false);
|
|
2253
|
+
// const [isProcessingSTT, setIsProcessingSTT] = useState(false);
|
|
2254
|
+
|
|
2255
|
+
const resetTranscript = () => {
|
|
2256
|
+
setTranscript('');
|
|
2257
|
+
// setIsProcessingSTT(false);
|
|
2258
|
+
};
|
|
2259
|
+
|
|
2206
2260
|
const setListeningTimeout = () => {
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
setUserMessage('');
|
|
2214
|
-
} else if (listening) {
|
|
2215
|
-
resetInteractionTimeout();
|
|
2216
|
-
}
|
|
2217
|
-
}, continuousSpeechTimeout * 1000);
|
|
2218
|
-
setTranscriptTimeout(timeout);
|
|
2261
|
+
clearListeningTimeout();
|
|
2262
|
+
const timeout = setTimeout(
|
|
2263
|
+
handleTranscriptProcessing,
|
|
2264
|
+
continuousSpeechTimeout * 1000 + 300
|
|
2265
|
+
);
|
|
2266
|
+
setTranscriptTimeout(timeout as unknown as NodeJS.Timeout);
|
|
2219
2267
|
};
|
|
2268
|
+
|
|
2220
2269
|
const clearListeningTimeout = () => {
|
|
2221
2270
|
if (transcriptTimeout) {
|
|
2222
2271
|
clearTimeout(transcriptTimeout);
|
|
2223
2272
|
setTranscriptTimeout(null);
|
|
2224
2273
|
}
|
|
2225
2274
|
};
|
|
2275
|
+
|
|
2226
2276
|
const resetListeningTimeout = () => {
|
|
2227
2277
|
clearListeningTimeout();
|
|
2228
|
-
if (continuousSpeech)
|
|
2278
|
+
if (continuousSpeech) {
|
|
2279
|
+
setListeningTimeout();
|
|
2280
|
+
}
|
|
2229
2281
|
};
|
|
2282
|
+
// Modified useEffect to handle transcript changes
|
|
2230
2283
|
useEffect(() => {
|
|
2231
|
-
|
|
2232
|
-
|
|
2284
|
+
if (!isSpeaking) {
|
|
2285
|
+
resetListeningTimeout();
|
|
2286
|
+
resetInteractionTimeout();
|
|
2287
|
+
}
|
|
2288
|
+
}, [transcript, isSpeaking]);
|
|
2233
2289
|
|
|
2234
|
-
|
|
2235
|
-
|
|
2290
|
+
// Clean up function for component unmount
|
|
2291
|
+
useEffect(() => {
|
|
2292
|
+
return () => {
|
|
2293
|
+
clearListeningTimeout();
|
|
2294
|
+
};
|
|
2295
|
+
}, []);
|
|
2236
2296
|
|
|
2237
2297
|
/**
|
|
2238
2298
|
* Listening methods
|
|
2239
2299
|
*/
|
|
2240
|
-
|
|
2241
|
-
|
|
2300
|
+
/**
|
|
2301
|
+
* Starts speech recognition using Azure Cognitive Services
|
|
2302
|
+
* Sets up recognizer and begins continuous recognition
|
|
2303
|
+
*/
|
|
2304
|
+
const startListening = async (): Promise<void> => {
|
|
2305
|
+
if (!AZURE_COGNITIVE_SERVICES_TTS_KEY) {
|
|
2306
|
+
throw new Error('No TTS key available');
|
|
2307
|
+
}
|
|
2242
2308
|
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2309
|
+
if (!sessionId) {
|
|
2310
|
+
throw new Error('No session ID available');
|
|
2311
|
+
}
|
|
2246
2312
|
|
|
2247
|
-
//
|
|
2248
|
-
|
|
2313
|
+
// Ensure complete cleanup before starting, if it's already listening, stop it
|
|
2314
|
+
cleanup();
|
|
2315
|
+
resetTranscript();
|
|
2249
2316
|
|
|
2250
2317
|
try {
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
.then(function (_stream) {
|
|
2254
|
-
setHasUserActivatedListening(true);
|
|
2255
|
-
|
|
2256
|
-
if (!speechConfig) {
|
|
2257
|
-
speechConfig = speechSdk.SpeechConfig.fromSubscription(
|
|
2258
|
-
AZURE_COGNITIVE_SERVICES_TTS_KEY,
|
|
2259
|
-
'westeurope'
|
|
2260
|
-
);
|
|
2261
|
-
speechConfig.speechRecognitionLanguage =
|
|
2262
|
-
getCultureCodeByLanguage(userLang);
|
|
2263
|
-
speechConfig.speechSynthesisLanguage =
|
|
2264
|
-
getCultureCodeByLanguage(userLang);
|
|
2265
|
-
speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
|
|
2266
|
-
}
|
|
2318
|
+
// Add delay to ensure previous instance is fully cleaned up
|
|
2319
|
+
// await new Promise(resolve => setTimeout(resolve, 300));
|
|
2267
2320
|
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
recognizer = new speechSdk.SpeechRecognizer(
|
|
2271
|
-
speechConfig,
|
|
2272
|
-
audioConfig
|
|
2273
|
-
);
|
|
2321
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
2322
|
+
setHasUserActivatedListening(true);
|
|
2274
2323
|
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
if (!e.result.text) return;
|
|
2278
|
-
if (e.result.reason === speechSdk.ResultReason.RecognizedSpeech) {
|
|
2279
|
-
let transcript = e.result.text;
|
|
2280
|
-
setTranscript(transcript || '');
|
|
2281
|
-
if (transcript?.length > 0) {
|
|
2282
|
-
const transcriptMessage = stripDuplicates(transcript);
|
|
2283
|
-
if (transcriptMessage.length > 0)
|
|
2284
|
-
setUserMessage(msg => `${msg} ${transcriptMessage}`);
|
|
2285
|
-
}
|
|
2286
|
-
} else if (e.result.reason === speechSdk.ResultReason.NoMatch) {
|
|
2287
|
-
console.debug('NOMATCH: Speech could not be recognized.');
|
|
2288
|
-
}
|
|
2289
|
-
};
|
|
2290
|
-
recognizer.canceled = (_s, e) => {
|
|
2291
|
-
if (e.reason === speechSdk.CancellationReason.Error) {
|
|
2292
|
-
console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
|
|
2293
|
-
console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
|
|
2294
|
-
console.debug(
|
|
2295
|
-
'CANCELED: Did you set the speech resource key and region values?'
|
|
2296
|
-
);
|
|
2297
|
-
}
|
|
2324
|
+
// Recreate speech config each time
|
|
2325
|
+
speechConfig = setupSpeechConfig(AZURE_COGNITIVE_SERVICES_TTS_KEY);
|
|
2298
2326
|
|
|
2299
|
-
|
|
2300
|
-
|
|
2327
|
+
const audioConfig = speechSdk.AudioConfig.fromDefaultMicrophoneInput();
|
|
2328
|
+
recognizer = new speechSdk.SpeechRecognizer(speechConfig, audioConfig);
|
|
2301
2329
|
|
|
2302
|
-
|
|
2303
|
-
|
|
2304
|
-
|
|
2330
|
+
// Set up recognizer event handlers
|
|
2331
|
+
setupRecognizerHandlers(recognizer);
|
|
2332
|
+
|
|
2333
|
+
// Start recognition
|
|
2334
|
+
setListening(true);
|
|
2335
|
+
recognizer.startContinuousRecognitionAsync();
|
|
2336
|
+
|
|
2337
|
+
recognizer.canceled = (_s, e) => {
|
|
2338
|
+
if (e.reason === speechSdk.CancellationReason.Error) {
|
|
2339
|
+
console.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
|
|
2340
|
+
console.debug(`"CANCELED: ErrorDetails=${e.errorDetails}`);
|
|
2341
|
+
console.debug(
|
|
2342
|
+
'CANCELED: Did you set the speech resource key and region values?'
|
|
2343
|
+
);
|
|
2344
|
+
stopListening();
|
|
2345
|
+
cleanup();
|
|
2346
|
+
}
|
|
2305
2347
|
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2348
|
+
stopListening();
|
|
2349
|
+
};
|
|
2350
|
+
|
|
2351
|
+
recognizer.sessionStopped = (_s, _e) => {
|
|
2352
|
+
stopListening();
|
|
2353
|
+
resetTranscript();
|
|
2354
|
+
};
|
|
2310
2355
|
} catch (error) {
|
|
2311
|
-
console.
|
|
2356
|
+
console.error('Error in startListening:', error);
|
|
2357
|
+
stopListening();
|
|
2358
|
+
throw error;
|
|
2359
|
+
}
|
|
2360
|
+
};
|
|
2361
|
+
|
|
2362
|
+
const setupSpeechConfig = (AZURE_COGNITIVE_SERVICES_TTS_KEY: string) => {
|
|
2363
|
+
speechConfig = speechSdk.SpeechConfig.fromSubscription(
|
|
2364
|
+
AZURE_COGNITIVE_SERVICES_TTS_KEY,
|
|
2365
|
+
'westeurope'
|
|
2366
|
+
);
|
|
2367
|
+
speechConfig.speechRecognitionLanguage = getCultureCodeByLanguage(userLang);
|
|
2368
|
+
speechConfig.speechSynthesisLanguage = getCultureCodeByLanguage(userLang);
|
|
2369
|
+
speechConfig.speechSynthesisVoiceName = getTTSVoice(userLang); // https://docs.microsoft.com/it-it/azure/cognitive-services/speech-service/language-support#text-to-speech
|
|
2370
|
+
return speechConfig;
|
|
2371
|
+
};
|
|
2372
|
+
|
|
2373
|
+
const setupRecognizerHandlers = (recognizer: speechSdk.SpeechRecognizer) => {
|
|
2374
|
+
if (recognizer) {
|
|
2375
|
+
recognizer.recognized = (_, event) => {
|
|
2376
|
+
// Process the recognized speech result
|
|
2377
|
+
handleRecognizedSpeech(event.result.text);
|
|
2378
|
+
};
|
|
2379
|
+
|
|
2380
|
+
// Configure speech recognition properties directly on the recognizer
|
|
2381
|
+
recognizer.properties.setProperty(
|
|
2382
|
+
'SpeechServiceResponse_JsonResult',
|
|
2383
|
+
'true'
|
|
2384
|
+
);
|
|
2385
|
+
|
|
2386
|
+
recognizer.properties.setProperty(
|
|
2387
|
+
'SpeechServiceConnection_NoiseSuppression',
|
|
2388
|
+
'true'
|
|
2389
|
+
);
|
|
2390
|
+
|
|
2391
|
+
recognizer.properties.setProperty(
|
|
2392
|
+
'SpeechServiceConnection_SNRThresholdDb',
|
|
2393
|
+
'10.0'
|
|
2394
|
+
);
|
|
2395
|
+
}
|
|
2396
|
+
};
|
|
2397
|
+
|
|
2398
|
+
const handleRecognizedSpeech = (text: string) => {
|
|
2399
|
+
console.debug('Handling recognized speech:', text);
|
|
2400
|
+
|
|
2401
|
+
if (!text || text.trim().length === 0) {
|
|
2402
|
+
console.debug('No valid text received from speech recognition');
|
|
2403
|
+
return;
|
|
2404
|
+
}
|
|
2405
|
+
|
|
2406
|
+
setTranscript(text);
|
|
2407
|
+
setIsSpeaking(false);
|
|
2408
|
+
|
|
2409
|
+
const message = stripDuplicates(text);
|
|
2410
|
+
console.debug('Stripped message:', message);
|
|
2411
|
+
if (message.length > 0) {
|
|
2412
|
+
setUserMessage(message);
|
|
2413
|
+
}
|
|
2414
|
+
};
|
|
2415
|
+
|
|
2416
|
+
// Helper function to handle transcript processing
|
|
2417
|
+
const handleTranscriptProcessing = () => {
|
|
2418
|
+
const message = stripDuplicates(transcript);
|
|
2419
|
+
if (message.length > 0 && listening) {
|
|
2420
|
+
sendMessage(message);
|
|
2421
|
+
resetTranscript();
|
|
2422
|
+
setUserMessage('');
|
|
2423
|
+
clearListening();
|
|
2424
|
+
} else if (listening) {
|
|
2425
|
+
resetInteractionTimeout();
|
|
2312
2426
|
}
|
|
2313
2427
|
};
|
|
2428
|
+
|
|
2429
|
+
/**
|
|
2430
|
+
* Stops the speech recognition process
|
|
2431
|
+
* Closes recognizer and cleans up resources
|
|
2432
|
+
*/
|
|
2314
2433
|
const stopListening = () => {
|
|
2434
|
+
console.debug('Stopping speech recognition');
|
|
2315
2435
|
if (recognizer) {
|
|
2316
2436
|
// Stop continuous recognition and close the recognizer
|
|
2317
2437
|
recognizer.stopContinuousRecognitionAsync();
|
|
@@ -2320,11 +2440,18 @@ const MemoriWidget = ({
|
|
|
2320
2440
|
}
|
|
2321
2441
|
setListening(false);
|
|
2322
2442
|
};
|
|
2443
|
+
|
|
2444
|
+
/**
|
|
2445
|
+
* Clears all listening state and stops recognition
|
|
2446
|
+
*/
|
|
2323
2447
|
const clearListening = () => {
|
|
2324
|
-
setHasUserActivatedListening(false);
|
|
2325
2448
|
stopListening();
|
|
2326
2449
|
clearListeningTimeout();
|
|
2450
|
+
setIsSpeaking(false);
|
|
2327
2451
|
};
|
|
2452
|
+
/**
|
|
2453
|
+
* Resets listening state and restarts recognition if currently listening
|
|
2454
|
+
*/
|
|
2328
2455
|
const resetListening = () => {
|
|
2329
2456
|
if (listening) {
|
|
2330
2457
|
clearListening();
|
|
@@ -2383,19 +2510,22 @@ const MemoriWidget = ({
|
|
|
2383
2510
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
2384
2511
|
[continuousSpeech, hasUserActivatedListening]
|
|
2385
2512
|
);
|
|
2513
|
+
|
|
2386
2514
|
useEffect(() => {
|
|
2515
|
+
// if memori is speaking, don't start listening
|
|
2387
2516
|
if (
|
|
2388
|
-
history.length > 1 &&
|
|
2389
2517
|
!isPlayingAudio &&
|
|
2390
2518
|
continuousSpeech &&
|
|
2391
|
-
(hasUserActivatedListening || !requestedListening)
|
|
2392
|
-
|
|
2519
|
+
(hasUserActivatedListening || !requestedListening) &&
|
|
2520
|
+
sessionId
|
|
2521
|
+
) {
|
|
2393
2522
|
startListening();
|
|
2394
|
-
else if (isPlayingAudio && listening) {
|
|
2523
|
+
} else if (isPlayingAudio && listening) {
|
|
2395
2524
|
stopListening();
|
|
2396
2525
|
}
|
|
2397
2526
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
2398
|
-
}, [isPlayingAudio]);
|
|
2527
|
+
}, [isPlayingAudio, hasUserActivatedListening]);
|
|
2528
|
+
|
|
2399
2529
|
useEffect(() => {
|
|
2400
2530
|
resetListening();
|
|
2401
2531
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
@@ -2617,11 +2747,6 @@ const MemoriWidget = ({
|
|
|
2617
2747
|
session?: { dialogState: DialogState; sessionID: string },
|
|
2618
2748
|
initialSessionExpired = false
|
|
2619
2749
|
) => {
|
|
2620
|
-
// console.log('[CLICK_START] Starting onClickStart with params:', {
|
|
2621
|
-
// hasSession: !!session,
|
|
2622
|
-
// initialSessionExpired
|
|
2623
|
-
// });
|
|
2624
|
-
|
|
2625
2750
|
const sessionID = session?.sessionID || sessionId;
|
|
2626
2751
|
const dialogState = session?.dialogState || currentDialogState;
|
|
2627
2752
|
setClickedStart(true);
|
|
@@ -2648,7 +2773,10 @@ const MemoriWidget = ({
|
|
|
2648
2773
|
'birthDate',
|
|
2649
2774
|
undefined
|
|
2650
2775
|
);
|
|
2651
|
-
let birth = birthDate || storageBirthDate ||
|
|
2776
|
+
let birth = birthDate || storageBirthDate || user?.birthDate;
|
|
2777
|
+
if (!birth && autoStart && initialSessionID)
|
|
2778
|
+
birth = '1970-01-01T10:24:03.845Z';
|
|
2779
|
+
|
|
2652
2780
|
// console.log('[CLICK_START] Using birth date:', birth);
|
|
2653
2781
|
|
|
2654
2782
|
// Handle age verification
|
|
@@ -2926,6 +3054,8 @@ const MemoriWidget = ({
|
|
|
2926
3054
|
(!!translatedMessages?.length && translatedMessages.length > 1) ||
|
|
2927
3055
|
!initialQuestion
|
|
2928
3056
|
) {
|
|
3057
|
+
console.log('[CLICK_START] Using existing chat history');
|
|
3058
|
+
|
|
2929
3059
|
// we have a history, don't push message
|
|
2930
3060
|
translateDialogState(
|
|
2931
3061
|
currentState,
|
|
@@ -2945,10 +3075,16 @@ const MemoriWidget = ({
|
|
|
2945
3075
|
setHasUserActivatedSpeak(true);
|
|
2946
3076
|
});
|
|
2947
3077
|
} else {
|
|
3078
|
+
console.log(
|
|
3079
|
+
'[CLICK_START] Using existing chat history with message from initial question'
|
|
3080
|
+
);
|
|
3081
|
+
|
|
2948
3082
|
// remove default initial message
|
|
2949
3083
|
translatedMessages = [];
|
|
2950
3084
|
setHistory([]);
|
|
2951
3085
|
|
|
3086
|
+
setMemoriTyping(true);
|
|
3087
|
+
|
|
2952
3088
|
// we have no chat history, we start by initial question
|
|
2953
3089
|
const response = await postTextEnteredEvent({
|
|
2954
3090
|
sessionId: sessionID,
|
|
@@ -2968,6 +3104,7 @@ const MemoriWidget = ({
|
|
|
2968
3104
|
}
|
|
2969
3105
|
})
|
|
2970
3106
|
.finally(() => {
|
|
3107
|
+
setMemoriTyping(false);
|
|
2971
3108
|
setHasUserActivatedSpeak(true);
|
|
2972
3109
|
});
|
|
2973
3110
|
}
|
|
@@ -3002,6 +3139,10 @@ const MemoriWidget = ({
|
|
|
3002
3139
|
|
|
3003
3140
|
useEffect(() => {
|
|
3004
3141
|
if (!clickedStart && autoStart) {
|
|
3142
|
+
// Initialize TTS before starting if AZURE_COGNITIVE_SERVICES_TTS_KEY exists
|
|
3143
|
+
if (AZURE_COGNITIVE_SERVICES_TTS_KEY && !speechSynthesizer) {
|
|
3144
|
+
initializeTTS();
|
|
3145
|
+
}
|
|
3005
3146
|
onClickStart();
|
|
3006
3147
|
}
|
|
3007
3148
|
}, [clickedStart, autoStart]);
|
|
@@ -3135,6 +3276,14 @@ const MemoriWidget = ({
|
|
|
3135
3276
|
setSpeakerMuted: mute => {
|
|
3136
3277
|
speakerMuted = !!mute;
|
|
3137
3278
|
setMuteSpeaker(mute);
|
|
3279
|
+
let microphoneMode = getLocalConfig<string>(
|
|
3280
|
+
'microphoneMode',
|
|
3281
|
+
'HOLD_TO_TALK'
|
|
3282
|
+
);
|
|
3283
|
+
if (microphoneMode === 'CONTINUOUS' && mute) {
|
|
3284
|
+
setContinuousSpeech(false);
|
|
3285
|
+
setLocalConfig('microphoneMode', 'HOLD_TO_TALK');
|
|
3286
|
+
}
|
|
3138
3287
|
setLocalConfig('muteSpeaker', !!mute);
|
|
3139
3288
|
if (mute) {
|
|
3140
3289
|
stopAudio();
|
|
@@ -3491,6 +3640,7 @@ const MemoriWidget = ({
|
|
|
3491
3640
|
setEnablePositionControls={setEnablePositionControls}
|
|
3492
3641
|
isAvatar3d={!!integrationConfig?.avatarURL}
|
|
3493
3642
|
additionalSettings={additionalSettings}
|
|
3643
|
+
speakerMuted={speakerMuted}
|
|
3494
3644
|
/>
|
|
3495
3645
|
)}
|
|
3496
3646
|
|