uneeq-js 3.13.0 → 3.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/3.index.js CHANGED
@@ -1,2 +1,2 @@
1
- "use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[3],{3(t,e,i){i.d(e,{DeepgramSTT:()=>p});var n=i(514),s=i(838),a=i(33),o=i(388),r=i(58),c=i(260);const h="[Deepgram STT]";var u,d;!function(t){t.FinalTranscript="Final transcript (from accumulated)",t.UtteranceEndFallback="UtteranceEnd fallback",t.SafetyNet="Safety net"}(u||(u={})),function(t){t.Idle="Idle",t.Connecting="Connecting",t.Connected="Connected",t.Paused="Paused",t.Disconnected="Disconnected"}(d||(d={}));class p{options;connection=null;state=d.Idle;shouldReconnect=!0;microphone=null;stream=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;accumulatedTranscript="";accumulatedConfidenceSum=0;accumulatedWordCount=0;lastDeepgramEventTime=0;safetyNetTimeoutId=null;constructor(t){this.options=t,this.options.model=this.options.model||"nova-3",this.options.language=this.options.language||"en",this.options.smartFormat=this.options.smartFormat??!0,this.options.interimResults=this.options.interimResults??!0,this.options.utteranceEndMs=this.options.utteranceEndMs??1500,this.options.vadEvents=this.options.vadEvents??!0,this.options.fillerWords=this.options.fillerWords??!1,this.options.endpointing=this.options.endpointing??500,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.options.interruptionWordThreshold=this.options.interruptionWordThreshold??3,this.options.noDelay=this.options.noDelay??!1,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.safetyNetTimeoutMs<=500?(n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to 2000ms.`),this.options.safetyNetTimeoutMs=2e3):this.options.safetyNetTimeoutMs<=1e3?n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is 2000ms.`):n.A.info(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`),this.handleAppMessages()}async startRecognition(){n.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){n.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return n.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetAccumulatedState(),this.resetSpeakingStates(),this.stream&&(this.stream.getTracks().forEach(t=>{t.enabled=!1}),n.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(n.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(t=>{t.enabled=!0}),n.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return n.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(t){this.options.promptMetadata=t}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const t=await this.getToken();n.A.info(`${h} Connecting to Deepgram`);const e=new c.c({accessToken:t.token,baseUrl:t.api_url}),i={model:this.options.model,language:this.options.language,smart_format:String(this.options.smartFormat),interim_results:String(this.options.interimResults),utterance_end_ms:this.options.utteranceEndMs,vad_events:String(this.options.vadEvents),filler_words:String(this.options.fillerWords),endpointing:this.options.endpointing,mip_opt_out:"true",...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms},...this.options.noDelay&&{queryParams:{no_delay:"true"}}},s=e.listen;if(this.connection=await s.v1.connect(i),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((t,e)=>setTimeout(()=>e(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&(this.state=d.Connected),n.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return n.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),n.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(t){this.state=d.Disconnected,n.A.error(`${h} Connection error`,n.A.serialiseError(t)),this.handleError(t),this.shouldReconnect&&this.scheduleReconnect()}}else n.A.warn(`${h} Connection already in progress`);else n.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){n.A.info(`${h} Disconnecting`),this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null);try{this.stopMicrophone(),this.connection&&(this.connection.close(),this.connection=null)}catch(t){n.A.error(`${h} Disconnect error`,n.A.serialiseError(t))}this.resetAccumulatedState(),this.resetSpeakingStates(),this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return n.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,n.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const t=this.options.model||"nova-3",e=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(t)}`,i=await fetch(e,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!i.ok)throw new Error(`Token fetch failed: ${i.status} ${i.statusText}`);return await i.json()}async startMicrophone(){try{if(n.A.info(`${h} Starting microphone`),this.stopMicrophone(),!navigator.mediaDevices?.getUserMedia)throw new Error("Microphone access is not available in this context");if(this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return n.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(t=>{t.enabled=!1});this.microphone=new MediaRecorder(this.stream,{mimeType:"audio/webm;codecs=opus",audioBitsPerSecond:48e3}),this.microphone.ondataavailable=t=>{t.data.size>0&&this.connection&&this.state===d.Connected&&t.data.arrayBuffer().then(t=>{this.connection?.sendMedia(t)}).catch(t=>{n.A.error(`${h} Error converting audio data`,n.A.serialiseError(t))})},this.microphone.start(250),n.A.debug(`${h} Microphone started`),this.clientMsgSend(new s.WY(!0))}catch(t){n.A.error(`${h} Microphone error`,n.A.serialiseError(t)),this.clientMsgSend(new s.co(new Error(JSON.stringify(t))))}}stopMicrophone(){this.microphone&&"recording"===this.microphone.state&&(this.microphone.stop(),this.microphone=null),this.stream&&(this.stream.getTracks().forEach(t=>{t.stop()}),this.stream=null),n.A.debug(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",t=>{if(null!==t&&"object"==typeof t&&"type"in t){const e=t;"Results"===e.type?this.handleTranscript(t):"UtteranceEnd"===e.type?this.handleUtteranceEnd(t):"Metadata"===e.type&&n.A.debug(`${h} Metadata`,t)}}),this.connection.on("close",()=>{this.handleConnectionClose()}),this.connection.on("error",t=>{this.handleError(t)}))}handleAppMessages(){this.options.messages.subscribe(t=>{switch(t.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:t.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===t.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=t.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleTranscript(t){try{this.lastDeepgramEventTime=Date.now();const e=t.channel;if(!e?.alternatives||0===e.alternatives.length)return;const i=e.alternatives[0],s=String(i.transcript||""),a=t.is_final,o=t.speech_final;if(n.A.debug(`${h} Transcript event: is_final=${a}, speech_final=${o}, transcript="${s.substring(0,50)}${s.length>50?"...":""}", accumulated="${this.accumulatedTranscript.substring(0,30)}${this.accumulatedTranscript.length>30?"...":""}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""===s&&o)return void(this.endsWithPunctuation(this.accumulatedTranscript)?(this.sendAccumulatedTranscript(u.FinalTranscript),this.resetSpeakingStates()):n.A.debug(`${h} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`));if(""===s)return;this.processTranscriptChunk(s,i.confidence,a,o),this.updateSpeakingAndInterruptionState(s,o),this.emitTranscriptionResult(s,i.confidence,o),this.resetSafetyNetTimeout()}catch(t){n.A.error(`${h} Error processing transcript`,n.A.serialiseError(t))}}processTranscriptChunk(t,e,i,s){if(i&&!s){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length,s=e??1;this.accumulatedConfidenceSum+=s*i,this.accumulatedWordCount+=i,n.A.debug(`${h} Accumulated transcript: "${this.accumulatedTranscript}"`)}}updateSpeakingAndInterruptionState(t,e){const i=(""!==this.accumulatedTranscript?this.accumulatedTranscript:t).trim().split(/\s+/).length,o=this.options.interruptionWordThreshold??3;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4));const c=!this.digitalHumanSpeaking||i>=o,u=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,d=e&&this.endsWithPunctuation(u);!this.isUserCurrentlySpeaking&&c&&(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new a.A(a.f.Start))),this.digitalHumanSpeaking&&i>=o&&(n.A.debug(`${h} Interrupting digital human`),this.dataChannelMsgSend(new r.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1),this.isUserCurrentlySpeaking&&d&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&d&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}emitTranscriptionResult(t,e,i){const a=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,o=i&&this.endsWithPunctuation(a),r={transcript:i?a:t,final:o,confidence:e??1,language_code:this.options.language||""};if(this.clientMsgSend(new s.Ux(r)),o){const t=a.trim().split(/\s+/).length,e=this.options.interruptionWordThreshold??3;this.digitalHumanSpeaking&&t<e?n.A.debug(`${h} Discarding utterance during speaking (${t} words < ${e} threshold): "${a}"`):(n.A.info(`${h} Final transcript: "${a}"`),this.sendChatPrompt(a)),this.resetAccumulatedState()}else if(i){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length;this.accumulatedConfidenceSum+=(e??1)*i,this.accumulatedWordCount+=i,n.A.debug(`${h} speech_final without punctuation, accumulated for safety net: "${this.accumulatedTranscript}"`)}}handleUtteranceEnd(t){this.lastDeepgramEventTime=Date.now(),n.A.debug(`${h} UtteranceEnd event received: last_word_end=${t?.last_word_end}, accumulated="${this.accumulatedTranscript.substring(0,50)}...", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""!==this.accumulatedTranscript.trim()?this.endsWithPunctuation(this.accumulatedTranscript)?(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),n.A.debug(`${h} UtteranceEnd fallback triggered with transcript: "${this.accumulatedTranscript}"`),this.sendAccumulatedTranscript(u.UtteranceEndFallback),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: reset speaking states`)):n.A.debug(`${h} UtteranceEnd: no punctuation, waiting for more speech or safety net`):(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: no transcript, reset speaking states`))}resetSafetyNetTimeout(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&""!==this.accumulatedTranscript.trim()&&(this.safetyNetTimeoutId=setTimeout(()=>{this.triggerSafetyNet()},this.options.safetyNetTimeoutMs))}triggerSafetyNet(){n.A.warn(`${h} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`),n.A.debug(`${h} Safety net triggered: accumulated="${this.accumulatedTranscript}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, timeSinceLastEvent=${Date.now()-this.lastDeepgramEventTime}ms`),this.sendAccumulatedTranscript(u.SafetyNet),this.resetSpeakingStates(),this.safetyNetTimeoutId=null}sendAccumulatedTranscript(t){if(""===this.accumulatedTranscript.trim())return;if(t!==u.SafetyNet&&!this.endsWithPunctuation(this.accumulatedTranscript))return void n.A.debug(`${h} sendAccumulatedTranscript: not sending as source is ${t} and transcript doesn't end in punctuation: "${this.accumulatedTranscript}"`);const e=this.options.interruptionWordThreshold??3,i=this.accumulatedWordCount>0?this.accumulatedConfidenceSum/this.accumulatedWordCount:1,a={transcript:this.accumulatedTranscript,final:!0,confidence:i,language_code:this.options.language||""};this.clientMsgSend(new s.Ux(a));const o=this.accumulatedTranscript.trim().split(/\s+/).length;this.digitalHumanSpeaking&&o<e?n.A.debug(`${h} Discarding utterance during speaking (${o} words < ${e} threshold): "${this.accumulatedTranscript}"`):(n.A.info(`${h} ${t}: "${this.accumulatedTranscript}"`),this.sendChatPrompt(this.accumulatedTranscript)),this.resetAccumulatedState()}endsWithPunctuation(t){const e=t.trim();return 0!==e.length&&/[.!?;:]$/.test(e)}resetAccumulatedState(){this.accumulatedTranscript="",this.accumulatedConfidenceSum=0,this.accumulatedWordCount=0}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(){if(n.A.info(`${h} Connection closed`),this.state===d.Paused)return n.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,void this.stopMicrophone();this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(n.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}handleError(t){n.A.error(`${h} Error occurred`,t);const e=t instanceof Error?t.message:String(t);e.includes("microphone")||e.includes("permission")||e.includes("getUserMedia")?this.clientMsgSend(new s.co(new Error(e))):this.clientMsgSend(new s.Cj(e))}sendChatPrompt(t){t&&""!==t.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new o.D(t,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}dataChannelMsgSend(t){this.options.sendMessage(t)}clientMsgSend(t){this.options.messages.next(t)}}}}]);
1
+ "use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[3],{3(t,e,i){i.d(e,{DeepgramSTT:()=>p});var n=i(514),s=i(838),a=i(33),o=i(388),r=i(58),c=i(260);const h="[Deepgram STT]";var u,d;!function(t){t.FinalTranscript="Final transcript (from accumulated)",t.UtteranceEndFallback="UtteranceEnd fallback",t.SafetyNet="Safety net"}(u||(u={})),function(t){t.Idle="Idle",t.Connecting="Connecting",t.Connected="Connected",t.Paused="Paused",t.Disconnected="Disconnected"}(d||(d={}));class p{options;connection=null;state=d.Idle;shouldReconnect=!0;microphone=null;stream=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;accumulatedTranscript="";accumulatedConfidenceSum=0;accumulatedWordCount=0;lastDeepgramEventTime=0;safetyNetTimeoutId=null;constructor(t){this.options=t,this.options.model=this.options.model||"nova-3",this.options.language=this.options.language||"en",this.options.smartFormat=this.options.smartFormat??!0,this.options.interimResults=this.options.interimResults??!0,this.options.utteranceEndMs=this.options.utteranceEndMs??1500,this.options.vadEvents=this.options.vadEvents??!0,this.options.fillerWords=this.options.fillerWords??!1,this.options.endpointing=this.options.endpointing??500,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.options.interruptionWordThreshold=this.options.interruptionWordThreshold??3,this.options.noDelay=this.options.noDelay??!1,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.safetyNetTimeoutMs<=500?(n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to 2000ms.`),this.options.safetyNetTimeoutMs=2e3):this.options.safetyNetTimeoutMs<=1e3?n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is 2000ms.`):n.A.info(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`),this.handleAppMessages()}async startRecognition(){n.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){n.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return n.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetAccumulatedState(),this.resetSpeakingStates(),this.stream&&(this.stream.getTracks().forEach(t=>{t.enabled=!1}),n.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(n.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(t=>{t.enabled=!0}),n.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return n.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(t){this.options.promptMetadata=t}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const t=await this.getToken();n.A.info(`${h} Connecting to Deepgram`);const e=new c.c({accessToken:t.token,baseUrl:t.api_url}),i={model:this.options.model,language:this.options.language,smart_format:String(this.options.smartFormat),interim_results:String(this.options.interimResults),utterance_end_ms:this.options.utteranceEndMs,vad_events:String(this.options.vadEvents),filler_words:String(this.options.fillerWords),endpointing:this.options.endpointing,mip_opt_out:"true",...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms},...this.options.noDelay&&{queryParams:{no_delay:"true"}}},s=e.listen;if(this.connection=await s.v1.connect(i),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((t,e)=>setTimeout(()=>e(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&(this.state=d.Connected),n.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return n.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),n.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(t){this.state=d.Disconnected,n.A.error(`${h} Connection error`,n.A.serialiseError(t)),this.shouldReconnect&&(this.emitTransientError(t),this.scheduleReconnect())}}else n.A.warn(`${h} Connection already in progress`);else n.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){n.A.info(`${h} Disconnecting`),this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null);try{this.stopMicrophone(),this.connection&&(this.connection.close(),this.connection=null)}catch(t){n.A.error(`${h} Disconnect error`,n.A.serialiseError(t))}this.resetAccumulatedState(),this.resetSpeakingStates(),this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return n.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,n.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const t=this.options.model||"nova-3",e=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(t)}`,i=await fetch(e,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!i.ok)throw new Error(`Token fetch failed: ${i.status} ${i.statusText}`);return await i.json()}async startMicrophone(){try{if(n.A.info(`${h} Starting microphone`),this.stopMicrophone(),!navigator.mediaDevices?.getUserMedia)throw new Error("Microphone access is not available in this context");if(this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return n.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(t=>{t.enabled=!1});this.microphone=new MediaRecorder(this.stream,{mimeType:"audio/webm;codecs=opus",audioBitsPerSecond:48e3}),this.microphone.ondataavailable=t=>{t.data.size>0&&this.connection&&this.state===d.Connected&&t.data.arrayBuffer().then(t=>{this.connection?.sendMedia(t)}).catch(t=>{n.A.error(`${h} Error converting audio data`,n.A.serialiseError(t))})},this.microphone.start(250),n.A.debug(`${h} Microphone started`),this.clientMsgSend(new s.WY(!0))}catch(t){n.A.error(`${h} Microphone error`,n.A.serialiseError(t)),this.clientMsgSend(new s.co(new Error(JSON.stringify(t))))}}stopMicrophone(){this.microphone&&"recording"===this.microphone.state&&(this.microphone.stop(),this.microphone=null),this.stream&&(this.stream.getTracks().forEach(t=>{t.stop()}),this.stream=null),n.A.debug(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",t=>{if(null!==t&&"object"==typeof t&&"type"in t){const e=t;"Results"===e.type?this.handleTranscript(t):"UtteranceEnd"===e.type?this.handleUtteranceEnd(t):"Metadata"===e.type&&n.A.debug(`${h} Metadata`,t)}}),this.connection.on("close",()=>{this.handleConnectionClose()}),this.connection.on("error",t=>{n.A.error(`${h} WebSocket error event`,n.A.serialiseError(t)),this.emitTransientError(t)}))}handleAppMessages(){this.options.messages.subscribe(t=>{switch(t.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:t.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===t.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=t.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleTranscript(t){try{this.lastDeepgramEventTime=Date.now();const e=t.channel;if(!e?.alternatives||0===e.alternatives.length)return;const i=e.alternatives[0],s=String(i.transcript||""),a=t.is_final,o=t.speech_final;if(n.A.debug(`${h} Transcript event: is_final=${a}, speech_final=${o}, transcript="${s.substring(0,50)}${s.length>50?"...":""}", accumulated="${this.accumulatedTranscript.substring(0,30)}${this.accumulatedTranscript.length>30?"...":""}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""===s&&o)return void(this.endsWithPunctuation(this.accumulatedTranscript)?(this.sendAccumulatedTranscript(u.FinalTranscript),this.resetSpeakingStates()):n.A.debug(`${h} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`));if(""===s)return;this.processTranscriptChunk(s,i.confidence,a,o),this.updateSpeakingAndInterruptionState(s,o),this.emitTranscriptionResult(s,i.confidence,o),this.resetSafetyNetTimeout()}catch(t){n.A.error(`${h} Error processing transcript`,n.A.serialiseError(t))}}processTranscriptChunk(t,e,i,s){if(i&&!s){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length,s=e??1;this.accumulatedConfidenceSum+=s*i,this.accumulatedWordCount+=i,n.A.debug(`${h} Accumulated transcript: "${this.accumulatedTranscript}"`)}}updateSpeakingAndInterruptionState(t,e){const i=(""!==this.accumulatedTranscript?this.accumulatedTranscript:t).trim().split(/\s+/).length,o=this.options.interruptionWordThreshold??3;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4));const c=!this.digitalHumanSpeaking||i>=o,u=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,d=e&&this.endsWithPunctuation(u);!this.isUserCurrentlySpeaking&&c&&(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new a.A(a.f.Start))),this.digitalHumanSpeaking&&i>=o&&(n.A.debug(`${h} Interrupting digital human`),this.dataChannelMsgSend(new r.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1),this.isUserCurrentlySpeaking&&d&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&d&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}emitTranscriptionResult(t,e,i){const a=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,o=i&&this.endsWithPunctuation(a),r={transcript:i?a:t,final:o,confidence:e??1,language_code:this.options.language||""};if(this.clientMsgSend(new s.Ux(r)),o){const t=a.trim().split(/\s+/).length,e=this.options.interruptionWordThreshold??3;this.digitalHumanSpeaking&&t<e?n.A.debug(`${h} Discarding utterance during speaking (${t} words < ${e} threshold): "${a}"`):(n.A.info(`${h} Final transcript: "${a}"`),this.sendChatPrompt(a)),this.resetAccumulatedState()}else if(i){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length;this.accumulatedConfidenceSum+=(e??1)*i,this.accumulatedWordCount+=i,n.A.debug(`${h} speech_final without punctuation, accumulated for safety net: "${this.accumulatedTranscript}"`)}}handleUtteranceEnd(t){this.lastDeepgramEventTime=Date.now(),n.A.debug(`${h} UtteranceEnd event received: last_word_end=${t?.last_word_end}, accumulated="${this.accumulatedTranscript.substring(0,50)}...", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""!==this.accumulatedTranscript.trim()?this.endsWithPunctuation(this.accumulatedTranscript)?(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),n.A.debug(`${h} UtteranceEnd fallback triggered with transcript: "${this.accumulatedTranscript}"`),this.sendAccumulatedTranscript(u.UtteranceEndFallback),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: reset speaking states`)):n.A.debug(`${h} UtteranceEnd: no punctuation, waiting for more speech or safety net`):(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: no transcript, reset speaking states`))}resetSafetyNetTimeout(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&""!==this.accumulatedTranscript.trim()&&(this.safetyNetTimeoutId=setTimeout(()=>{this.triggerSafetyNet()},this.options.safetyNetTimeoutMs))}triggerSafetyNet(){n.A.warn(`${h} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`),n.A.debug(`${h} Safety net triggered: accumulated="${this.accumulatedTranscript}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, timeSinceLastEvent=${Date.now()-this.lastDeepgramEventTime}ms`),this.sendAccumulatedTranscript(u.SafetyNet),this.resetSpeakingStates(),this.safetyNetTimeoutId=null}sendAccumulatedTranscript(t){if(""===this.accumulatedTranscript.trim())return;if(t!==u.SafetyNet&&!this.endsWithPunctuation(this.accumulatedTranscript))return void n.A.debug(`${h} sendAccumulatedTranscript: not sending as source is ${t} and transcript doesn't end in punctuation: "${this.accumulatedTranscript}"`);const e=this.options.interruptionWordThreshold??3,i=this.accumulatedWordCount>0?this.accumulatedConfidenceSum/this.accumulatedWordCount:1,a={transcript:this.accumulatedTranscript,final:!0,confidence:i,language_code:this.options.language||""};this.clientMsgSend(new s.Ux(a));const o=this.accumulatedTranscript.trim().split(/\s+/).length;this.digitalHumanSpeaking&&o<e?n.A.debug(`${h} Discarding utterance during speaking (${o} words < ${e} threshold): "${this.accumulatedTranscript}"`):(n.A.info(`${h} ${t}: "${this.accumulatedTranscript}"`),this.sendChatPrompt(this.accumulatedTranscript)),this.resetAccumulatedState()}endsWithPunctuation(t){const e=t.trim();return 0!==e.length&&/[.!?;:]$/.test(e)}resetAccumulatedState(){this.accumulatedTranscript="",this.accumulatedConfidenceSum=0,this.accumulatedWordCount=0}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(){if(n.A.info(`${h} Connection closed`),this.state===d.Paused)return n.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,void this.stopMicrophone();this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(n.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}sendChatPrompt(t){t&&""!==t.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new o.D(t,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}emitTransientError(t){const e=t instanceof Error?t.message:String(t);this.clientMsgSend(new s.fP(e))}dataChannelMsgSend(t){this.options.sendMessage(t)}clientMsgSend(t){this.options.messages.next(t)}}}}]);
2
2
  //# sourceMappingURL=3.index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"3.index.js","mappings":"oPA0CA,MAAMA,EAAa,iBAenB,IAAKC,EAOAC,GAPL,SAAKD,GACD,wDACA,+CACA,wBACH,CAJD,CAAKA,IAAAA,EAA2B,KAOhC,SAAKC,GACD,cACA,0BACA,wBACA,kBACA,6BACH,CAND,CAAKA,IAAAA,EAAQ,KAqHN,MAAMC,EAkCoBC,QAjCrBC,WAA4C,KAC5CC,MAAkBJ,EAASK,KAC3BC,iBAA2B,EAC3BC,WAAmC,KACnCC,OAA6B,KAG7BC,kBAA4B,EAC5BC,eA9IuB,IA+IvBC,mBAA4C,KAG5CC,sBAAgC,EAGhCC,yBAAmC,EAGnCC,qBAA+B,EAK/BC,sBAAgC,GAGhCC,yBAAmC,EACnCC,qBAA+B,EAG/BC,sBAAgC,EAChCC,mBAA4C,KAEpD,WAAAC,CAA6BlB,GAAA,KAAAA,QAAAA,EAEzBmB,KAAKnB,QAAQoB,MAAQD,KAAKnB,QAAQoB,OAAS,SAC3CD,KAAKnB,QAAQqB,SAAWF,KAAKnB,QAAQqB,UAAY,KACjDF,KAAKnB,QAAQsB,YAAcH,KAAKnB,QAAQsB,cAAe,EACvDH,KAAKnB,QAAQuB,eAAiBJ,KAAKnB,QAAQuB,iBAAkB,EAC7DJ,KAAKnB,QAAQwB,eAAiBL,KAAKnB,QAAQwB,gBAAkB,KAC7DL,KAAKnB,QAAQyB,UAAYN,KAAKnB,QAAQyB,YAAa,EACnDN,KAAKnB,QAAQ0B,YAAcP,KAAKnB,QAAQ0B,cAAe,EACvDP,KAAKnB,QAAQ2B,YAAcR,KAAKnB,QAAQ2B,aAAe,IAKvDR,KAAKnB,QAAQ4B,iBAAmBT,KAAKnB,QAAQ4B,mBAAoB,EACjET,KAAKnB,QAAQ6B,iBAAmBV,KAAKnB,QAAQ6B,mBAAoB,EACjEV,KAAKnB,QAAQ8B,gBAAkBX,KAAKnB,QAAQ8B,kBAAmB,EAG/DX,KAAKnB,QAAQ+B,0BAA4BZ,KAAKnB,QAAQ+B,2BAvH1B,EAwH5BZ,KAAKnB,QAAQgC,QAAUb,KAAKnB,QAAQgC,UAAW,EAC/Cb,KAAKnB,QAAQiC,mBAAqBd,KAAKnB,QAAQiC,oBAtLjB,IAwL1Bd,KAAKnB,QAAQiC,oBAAsB,KACnC,IAAOC,KAAK,GAAGtC,kCAA2CuB,KAAKnB,QAAQiC,uIACvEd,KAAKnB,QAAQiC,mBA1La,KA2LnBd,KAAKnB,QAAQiC,oBAAsB,IAC1C,IAAOC,KAAK,GAAGtC,kCAA2CuB,KAAKnB,QAAQiC,0HAEvE,IAAOE,KAAK,GAAGvC,kCAA2CuB,KAAKnB,QAAQiC,yBAG3Ed,KAAKiB,mBACT,CAGO,sBAAMC,GACT,IAAOF,KAAK,GAAGvC,iCACfuB,KAAKf,iBAAkB,EACvBe,KAAKmB,+BACCnB,KAAKoB,SACf,CAEO,qBAAMC,GACT,IAAOL,KAAK,GAAGvC,iCACfuB,KAAKf,iBAAkB,EACvBe,KAAKsB,8BACCtB,KAAKuB,YACf,CAEO,WAAMC,GAwBT,OAvBA,IAAOR,KAAK,GAAGvC,gCACfuB,KAAKjB,MAAQJ,EAAS8C,OAGlBzB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAI9BE,KAAK2B,wBAGL3B,KAAK4B,sBAGD5B,KAAKb,SACLa,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAC7D,IAAOC,MAAM,GAAGxD,6BAKb,CACX,CAEO,YAAMyD,GAGT,GAFA,IAAOlB,KAAK,GAAGvC,iCAEXuB,KAAKjB,QAAUJ,EAAS8C,OAAQ,CAChC,GAAIzB,KAAKb,OAKL,OAHAa,KAAKjB,MAAQJ,EAASwD,UACtBnC,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAC7D,IAAOC,MAAM,GAAGxD,8BACT,EAGX,GAAIuB,KAAKlB,WAGL,OAFAkB,KAAKjB,MAAQJ,EAASwD,gBAChBnC,KAAKoC,mBACJ,EAGXpC,KAAKjB,MAAQJ,EAAS0D,YAC1B,CAKA,OAFA,IAAOJ,MAAM,GAAGxD,iCACVuB,KAAKoB,WACJ,CACX,CAGO,eAAAkB,CAAgBC,GACnBvC,KAAKnB,QAAQ2D,eAAiBD,CAClC,CAGQ,aAAMnB,GACV,GAAIpB,KAAKjB,QAAUJ,EAASwD,UAK5B,GAAInC,KAAKjB,QAAUJ,EAAS8D,WAA5B,CAKAzC,KAAKjB,MAAQJ,EAAS8D,WAEtB,IACI,MAAMC,QAAkB1C,KAAK2C,WAC7B,IAAO3B,KAAK,GAAGvC,4BAIf,MAAMmE,EAAW,IAAI,IAAe,CAChCC,YAAaH,EAAUI,MACvBC,QAASL,EAAUM,UAMjBC,EAAoB,CACtBhD,MAAOD,KAAKnB,QAAQoB,MACpBC,SAAUF,KAAKnB,QAAQqB,SACvBgD,aAAcC,OAAOnD,KAAKnB,QAAQsB,aAClCiD,gBAAiBD,OAAOnD,KAAKnB,QAAQuB,gBACrCiD,iBAAkBrD,KAAKnB,QAAQwB,eAC/BiD,WAAYH,OAAOnD,KAAKnB,QAAQyB,WAChCiD,aAAcJ,OAAOnD,KAAKnB,QAAQ0B,aAClCC,YAAaR,KAAKnB,QAAQ2B,YAG1BgD,YAAa,UAGTxD,KAAKnB,QAAQ4E,UAAYzD,KAAKnB,QAAQ4E,SAASC,OAAS,GAAK,CAAEC,QAAS3D,KAAKnB,QAAQ4E,aAErFzD,KAAKnB,QAAQgC,SAAW,CAAE+C,YAAa,CAAEC,SAAU,UAMrDC,EAAWlB,EAASmB,OA2B1B,GA1BA/D,KAAKlB,iBAAmBgF,EAASE,GAAG5C,QAAQ6B,GAK5CjD,KAAKlB,WAAWsC,gBACV6C,QAAQC,KAAK,CACflE,KAAKlB,WAAWqF,cAChB,IAAIF,QAAc,CAACG,EAAGC,IAClBC,WAAW,IAAMD,EAAO,IAAIE,MAAM,uBArVxB,QA2VbvE,KAAKjB,QAAuBJ,EAAS8C,SACtCzB,KAAKjB,MAAQJ,EAASwD,WAE1B,IAAOnB,KAAK,GAAGvC,uBAGfuB,KAAKwE,qBAKAxE,KAAKjB,QAAuBJ,EAAS8C,OAGtC,OAFA,IAAOT,KAAK,GAAGvC,6DACfuB,KAAKmB,+BAKHnB,KAAKoC,kBAEX,IAAOpB,KAAK,GAAGvC,4BAGfuB,KAAKmB,wBACT,CAAE,MAAOsD,GACLzE,KAAKjB,MAAQJ,EAAS0D,aACtB,IAAOoC,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,IACrEzE,KAAK2E,YAAYF,GAGbzE,KAAKf,iBACLe,KAAK4E,mBAEb,CAzFA,MAFI,IAAO7D,KAAK,GAAGtC,yCALf,IAAOsC,KAAK,GAAGtC,sBAiGvB,CAEQ,gBAAM8C,GACV,GAAIvB,KAAKjB,QAAUJ,EAASK,OAASgB,KAAKjB,QAAUJ,EAAS0D,cAAiBrC,KAAKlB,YAAnF,CAIA,IAAOkC,KAAK,GAAGvC,mBAGXuB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAG9B,IACIE,KAAK6E,iBAED7E,KAAKlB,aACLkB,KAAKlB,WAAWgG,QAChB9E,KAAKlB,WAAa,KAE1B,CAAE,MAAO2F,GACL,IAAOA,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,GACzE,CAGAzE,KAAK2B,wBACL3B,KAAK4B,sBAEL5B,KAAKjB,MAAQJ,EAAS0D,aACtBrC,KAAK+E,cAAc,IAAI,MAA+B,GA1BtD,CA2BJ,CAEQ,iBAAAH,GACJ,GAAI5E,KAAKZ,mBAxZc,EA6ZnB,OAJA,IAAOqF,MAAM,GAAGhG,gDAChBuB,KAAK+E,cAAc,IAAI,KACnB,qEAKR/E,KAAKZ,oBACL,IAAO4B,KACH,GAAGvC,qCAA8CuB,KAAKZ,0BAChDY,KAAKX,oBAGfW,KAAKV,mBAAqBgF,WAAW,KAC5BtE,KAAKoB,WACXpB,KAAKX,gBAGRW,KAAKX,eAAiB2F,KAAKC,IA5aE,EA6azBjF,KAAKX,eA9ac,IAib3B,CAEQ,sBAAA8B,GACJnB,KAAKZ,kBAAoB,EACzBY,KAAKX,eAtbsB,IAub3BW,KAAKsB,uBACT,CAEQ,qBAAAA,GACAtB,KAAKV,qBACLoC,aAAa1B,KAAKV,oBAClBU,KAAKV,mBAAqB,KAElC,CAEQ,cAAMqD,GAEV,MAAM1C,EAAQD,KAAKnB,QAAQoB,OAAS,SAC9BiF,EAAgB,GAAGlF,KAAKnB,QAAQsG,iEAAiEC,mBAAmBnF,KAEpHoF,QAAiBC,MAAMJ,EAAe,CACxCK,OAAQ,MACRC,QAAS,CACLC,cAAe,UAAUzF,KAAKnB,QAAQ6G,WACtC,eAAgB,sBAIxB,IAAKL,EAASM,GACV,MAAM,IAAIpB,MAAM,uBAAuBc,EAASO,UAAUP,EAASQ,cAGvE,aAAaR,EAASS,MAC1B,CAEQ,qBAAM1D,GACV,IAQI,GAPA,IAAOpB,KAAK,GAAGvC,yBAIfuB,KAAK6E,kBAGAkB,UAAUC,cAAcC,aACzB,MAAM,IAAI1B,MAAM,sDAYpB,GAVAvE,KAAKb,aAAe4G,UAAUC,aAAaC,aAAa,CACpDC,MAAO,CACHC,SAAUnG,KAAKnB,QAAQuH,mBAAqB,CAAEC,MAAOrG,KAAKnB,QAAQuH,yBAAuBE,EACzF7F,iBAAkBT,KAAKnB,QAAQ4B,iBAC/BC,iBAAkBV,KAAKnB,QAAQ6B,iBAC/BC,gBAAiBX,KAAKnB,QAAQ8B,mBAKjCX,KAAKjB,QAAuBJ,EAAS8C,OAGtC,OAFA,IAAOT,KAAK,GAAGvC,2EACfuB,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAMjEhC,KAAKd,WAAa,IAAIqH,cAAcvG,KAAKb,OAAQ,CAC7CqH,SAAU,yBACVC,mBAxfc,OA2flBzG,KAAKd,WAAWwH,gBAAmBC,IAC3BA,EAAMC,KAAKC,KAAO,GAAK7G,KAAKlB,YAAckB,KAAKjB,QAAUJ,EAASwD,WAElEwE,EAAMC,KAAKE,cAAcC,KAAMD,IAC3B9G,KAAKlB,YAAYkI,UAAUF,KAC5BG,MAAOxC,IACN,IAAOA,MAAM,GAAGhG,gCAA0C,IAAOiG,eAAeD,OAM5FzE,KAAKd,WAAWgI,MAxgBL,KAygBX,IAAOjF,MAAM,GAAGxD,wBAGhBuB,KAAK+E,cAAc,IAAI,MAA+B,GAC1D,CAAE,MAAON,GACL,IAAOA,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,IACrEzE,KAAK+E,cAAc,IAAI,KAAmB,IAAIR,MAAM4C,KAAKC,UAAU3C,KACvE,CACJ,CAEQ,cAAAI,GACA7E,KAAKd,YAAwC,cAA1Bc,KAAKd,WAAWH,QACnCiB,KAAKd,WAAWmI,OAChBrH,KAAKd,WAAa,MAGlBc,KAAKb,SACLa,KAAKb,OAAO0C,YAAYC,QAASC,IAC7BA,EAAMsF,SAEVrH,KAAKb,OAAS,MAGlB,IAAO8C,MAAM,GAAGxD,uBACpB,CAEQ,kBAAA+F,GACCxE,KAAKlB,aAMVkB,KAAKlB,WAAWwI,GAAG,OAAQ,KACvBtH,KAAKuH,yBAKTvH,KAAKlB,WAAWwI,GAAG,UAAYV,IAC3B,GAAa,OAATA,GAAiC,iBAATA,GAAqB,SAAUA,EAAM,CAC7D,MAAMY,EAAQZ,EACK,YAAfY,EAAMC,KACNzH,KAAK0H,iBAAiBd,GACA,iBAAfY,EAAMC,KAGbzH,KAAK2H,mBAAmBf,GACF,aAAfY,EAAMC,MACb,IAAOxF,MAAM,GAAGxD,aAAuBmI,EAE/C,IAGJ5G,KAAKlB,WAAWwI,GAAG,QAAS,KACxBtH,KAAK4H,0BAGT5H,KAAKlB,WAAWwI,GAAG,QAAU7C,IACzBzE,KAAK2E,YAAYF,KAEzB,CAEQ,iBAAAxD,GACJjB,KAAKnB,QAAQgJ,SAASC,UAAWC,IAC7B,OAAQA,EAAIC,kBACZ,KAAK,KAAiBC,sBAClBjI,KAAKT,sBAAuB,EAC5B,MAEJ,KAAK,KAAiB2I,aACUH,EACHI,aAAaC,SAElCpI,KAAKqI,oBAET,MAGJ,KAAK,KAAiBC,aAEkC,KADrCP,EACJQ,aAAaC,QAAQ,WAAY,KAExCxI,KAAKqI,oBAET,MAGJ,KAAK,KAAiBI,sBAClBzI,KAAKqI,oBACL,MAGJ,KAAK,KAAiBK,aAClB1I,KAAKf,iBAAkB,EAClBe,KAAKqB,kBACV,MAGJ,KAAK,KAAiBsH,oBAClB3I,KAAKqI,oBACLrI,KAAKf,iBAAkB,EAClBe,KAAKqB,kBACV,MAGJ,KAAK,KAAiBuH,sBAClB5I,KAAKnB,QAAQ2D,eAAkBuF,EAA8BxF,aAC7D,MAGJ,KAAK,KAAiBsG,oBAClB7I,KAAKqI,sBAOjB,CAOQ,gBAAAX,CAAiBd,GACrB,IACI5G,KAAKH,sBAAwBiJ,KAAKC,MAElC,MAAMC,EAAUpC,EAAKoC,QACrB,IAAKA,GAASC,cAAgD,IAAhCD,EAAQC,aAAavF,OAC/C,OAGJ,MAAMwF,EAAcF,EAAQC,aAAa,GACnCE,EAAqBhG,OAAO+F,EAAYC,YAAc,IACtDC,EAAUxC,EAAKyC,SACfC,EAAc1C,EAAK2C,aASzB,GANA,IAAOtH,MAAM,GAAGxD,gCAAyC2K,mBAAyBE,kBAC/DH,EAAWK,UAAU,EAAG,MAAML,EAAWzF,OAAS,GAAK,MAAQ,qBAC9D1D,KAAKN,sBAAsB8J,UAAU,EAAG,MAAMxJ,KAAKN,sBAAsBgE,OAAS,GAAK,MAAQ,mBACjG1D,KAAKP,qCAAqCO,KAAKR,2BAG9C,KAAf2J,GAAqBG,EAOrB,YANItJ,KAAKyJ,oBAAoBzJ,KAAKN,wBAC9BM,KAAK0J,0BAA0BhL,EAA4BiL,iBAC3D3J,KAAK4B,uBAEL,IAAOK,MAAM,GAAGxD,mGAKxB,GAAmB,KAAf0K,EACA,OAGJnJ,KAAK4J,uBAAuBT,EAAYD,EAAYW,WAAYT,EAASE,GACzEtJ,KAAK8J,mCAAmCX,EAAYG,GACpDtJ,KAAK+J,wBAAwBZ,EAAYD,EAAYW,WAAYP,GAEjEtJ,KAAKgK,uBACT,CAAE,MAAOvF,GACL,IAAOA,MAAM,GAAGhG,gCAA0C,IAAOiG,eAAeD,GACpF,CACJ,CASQ,sBAAAmF,CAAuBT,EAAoBU,EAAoBT,EAAkBE,GACrF,GAAIF,IAAYE,EAAa,CACU,KAA/BtJ,KAAKN,wBACLM,KAAKN,uBAAyB,KAElCM,KAAKN,uBAAyByJ,EAE9B,MAAMc,EAAiBd,EAAWe,OAAOC,MAAM,OAAOzG,OAChD0G,EAAkBP,GAAc,EACtC7J,KAAKL,0BAA4ByK,EAAkBH,EACnDjK,KAAKJ,sBAAwBqK,EAE7B,IAAOhI,MAAM,GAAGxD,8BAAuCuB,KAAKN,yBAChE,CACJ,CAQQ,kCAAAoK,CAAmCX,EAAoBG,GAC3D,MACMe,GADqD,KAA/BrK,KAAKN,sBAA+BM,KAAKN,sBAAwByJ,GACvDe,OAAOC,MAAM,OAAOzG,OACpD4G,EAAYtK,KAAKnB,QAAQ+B,2BA7oBH,EA+oBvBZ,KAAKP,sBACNO,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,OAG3B,MAAMwF,GAAqBvK,KAAKT,sBAAwB8K,GAAaC,EAE/DE,EAAgD,KAA/BxK,KAAKN,sBACtBM,KAAKN,sBAAwB,IAAMyJ,EACnCA,EACAsB,EAAuBnB,GAAetJ,KAAKyJ,oBAAoBe,IAEhExK,KAAKR,yBAA2B+K,IACjCvK,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBC,SAG3D3K,KAAKT,sBAAwB8K,GAAaC,IAC1C,IAAOrI,MAAM,GAAGxD,gCAChBuB,KAAK0K,mBAAmB,IAAI,KAC5B1K,KAAK+E,cAAc,IAAI,MACvB/E,KAAKT,sBAAuB,GAG5BS,KAAKR,yBAA2BiL,IAChCzK,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBE,QAG3D5K,KAAKP,qBAAuBgL,IAC5BzK,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,MAE/B,CASQ,uBAAAgF,CAAwBZ,EAAoBU,EAAoBP,GACpE,MAAMkB,EAAgD,KAA/BxK,KAAKN,sBACtBM,KAAKN,sBAAwB,IAAMyJ,EACnCA,EACAsB,EAAuBnB,GAAetJ,KAAKyJ,oBAAoBe,GAE/DK,EAAoC,CACtC1B,WAAYG,EAAckB,EAAiBrB,EAC3C2B,MAAOL,EACPZ,WAAYA,GAAc,EAC1BkB,cAAe/K,KAAKnB,QAAQqB,UAAY,IAM5C,GAFAF,KAAK+E,cAAc,IAAI,KAA2B8F,IAE9CJ,EAAsB,CACtB,MAAMO,EAAgBR,EAAeN,OAAOC,MAAM,OAAOzG,OACnD4G,EAAYtK,KAAKnB,QAAQ+B,2BA3sBP,EA4sBpBZ,KAAKT,sBAAwByL,EAAgBV,EAC7C,IAAOrI,MAAM,GAAGxD,2CAAoDuM,aAAyBV,kBAA0BE,OAEvH,IAAOxJ,KAAK,GAAGvC,wBAAiC+L,MAChDxK,KAAKiL,eAAeT,IAExBxK,KAAK2B,uBACT,MAAO,GAAI2H,EAAa,CACe,KAA/BtJ,KAAKN,wBACLM,KAAKN,uBAAyB,KAElCM,KAAKN,uBAAyByJ,EAE9B,MAAMc,EAAiBd,EAAWe,OAAOC,MAAM,OAAOzG,OACtD1D,KAAKL,2BAA6BkK,GAAc,GAAOI,EACvDjK,KAAKJ,sBAAwBqK,EAE7B,IAAOhI,MAAM,GAAGxD,oEAA6EuB,KAAKN,yBACtG,CACJ,CAOQ,kBAAAiI,CAAmBf,GACvB5G,KAAKH,sBAAwBiJ,KAAKC,MAElC,IAAO9G,MAAM,GAAGxD,gDAAyDmI,GAAMsE,+BAC3DlL,KAAKN,sBAAsB8J,UAAU,EAAG,uBAC1CxJ,KAAKP,qCAAqCO,KAAKR,2BAGvB,KAAtCQ,KAAKN,sBAAsBwK,OACvBlK,KAAKyJ,oBAAoBzJ,KAAKN,wBAG1BM,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAE9B,IAAOmC,MAAM,GAAGxD,uDAAgEuB,KAAKN,0BACrFM,KAAK0J,0BAA0BhL,EAA4ByM,sBAC3DnL,KAAK4B,sBACL,IAAOK,MAAM,GAAGxD,0CAKhB,IAAOwD,MAAM,GAAGxD,0EAKhBuB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAE9BE,KAAK4B,sBACL,IAAOK,MAAM,GAAGxD,wDAExB,CAMQ,qBAAAuL,GAEAhK,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,OAIzBE,KAAKP,qBAAuBO,KAAKR,0BAAkE,KAAtCQ,KAAKN,sBAAsBwK,SACzFlK,KAAKF,mBAAqBwE,WAAW,KACjCtE,KAAKoL,oBACNpL,KAAKnB,QAAQiC,oBAExB,CAMQ,gBAAAsK,GACJ,IAAOrK,KAAK,GAAGtC,kDAA2DuB,KAAKnB,QAAQiC,uCACvF,IAAOmB,MAAM,GAAGxD,wCAAiDuB,KAAKN,sCACpDM,KAAKP,qCAAqCO,KAAKR,+CACvCsJ,KAAKC,MAAQ/I,KAAKH,2BAE5CG,KAAK0J,0BAA0BhL,EAA4B2M,WAC3DrL,KAAK4B,sBACL5B,KAAKF,mBAAqB,IAC9B,CAOQ,yBAAA4J,CAA0B4B,GAC9B,GAA0C,KAAtCtL,KAAKN,sBAAsBwK,OAC3B,OAGJ,GAAIoB,IAAW5M,EAA4B2M,YAAcrL,KAAKyJ,oBAAoBzJ,KAAKN,uBAEnF,YADA,IAAOuC,MAAM,GAAGxD,yDAAkE6M,iDAAsDtL,KAAKN,0BAIjJ,MAAM4K,EAAYtK,KAAKnB,QAAQ+B,2BA7zBH,EAg0BtB2K,EAAgBvL,KAAKJ,qBAAuB,EAC5CI,KAAKL,yBAA2BK,KAAKJ,qBACrC,EAGAiL,EAAoC,CACtC1B,WAAYnJ,KAAKN,sBACjBoL,OAAO,EACPjB,WAAY0B,EACZR,cAAe/K,KAAKnB,QAAQqB,UAAY,IAE5CF,KAAK+E,cAAc,IAAI,KAA2B8F,IAGlD,MAAMR,EAAYrK,KAAKN,sBAAsBwK,OAAOC,MAAM,OAAOzG,OAC7D1D,KAAKT,sBAAwB8K,EAAYC,EACzC,IAAOrI,MAAM,GAAGxD,2CAAoD4L,aAAqBC,kBAA0BtK,KAAKN,2BAExH,IAAOsB,KAAK,GAAGvC,KAAc6M,OAAYtL,KAAKN,0BAC9CM,KAAKiL,eAAejL,KAAKN,wBAI7BM,KAAK2B,uBACT,CAOQ,mBAAA8H,CAAoB+B,GACxB,MAAMC,EAAUD,EAAKtB,OACrB,OAAuB,IAAnBuB,EAAQ/H,QAIL,WAAWgI,KAAKD,EAC3B,CAKQ,qBAAA9J,GACJ3B,KAAKN,sBAAwB,GAC7BM,KAAKL,yBAA2B,EAChCK,KAAKJ,qBAAuB,CAChC,CAKQ,mBAAAgC,GACA5B,KAAKR,0BACLQ,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBE,QAG3D5K,KAAKP,sBACLO,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,MAE/B,CAEQ,oBAAAwC,GAEAvH,KAAKjB,QAAUJ,EAAS8C,SACxBzB,KAAKjB,MAAQJ,EAASwD,UAE9B,CAEQ,qBAAAyF,GAMJ,GALA,IAAO5G,KAAK,GAAGvC,uBAKXuB,KAAKjB,QAAUJ,EAAS8C,OAIxB,OAHA,IAAOT,KAAK,GAAGvC,+DACfuB,KAAKlB,WAAa,UAClBkB,KAAK6E,iBAIT7E,KAAKjB,MAAQJ,EAAS0D,aACtBrC,KAAK+E,cAAc,IAAI,MAA+B,IAGlD/E,KAAKf,kBACL,IAAO+B,KAAK,GAAGvC,uDACfuB,KAAK4E,oBAEb,CAEQ,WAAAD,CAAYF,GAChB,IAAOA,MAAM,GAAGhG,mBAA6BgG,GAE7C,MAAMkH,EAAelH,aAAiBF,MAChCE,EAAMmH,QACNzI,OAAOsB,GAGTkH,EAAaE,SAAS,eAAiBF,EAAaE,SAAS,eAAiBF,EAAaE,SAAS,gBACpG7L,KAAK+E,cAAc,IAAI,KAAmB,IAAIR,MAAMoH,KAEpD3L,KAAK+E,cAAc,IAAI,KAAoB4G,GAEnD,CAEQ,cAAAV,CAAe9B,GACfA,GAAoC,KAAtBA,EAAWe,SAErBlK,KAAKnB,QAAQqB,WACbF,KAAKnB,QAAQ2D,eAAesJ,iBAAmB9L,KAAKnB,QAAQqB,UAGhEF,KAAK0K,mBAAmB,IAAI,IAAWvB,EAAYnJ,KAAKnB,QAAQ2D,iBAExE,CAEQ,iBAAA6F,GACJrI,KAAKT,sBAAuB,CAChC,CAGQ,kBAAAmL,CAAmB3C,GACvB/H,KAAKnB,QAAQkN,YAAYhE,EAC7B,CAGQ,aAAAhD,CAAcgD,GAClB/H,KAAKnB,QAAQgJ,SAASmE,KAAKjE,EAC/B,E","sources":["webpack://Uneeq/./src/deepgram-stt.ts"],"sourcesContent":["import { type Subject } from 'rxjs'\nimport Logger from './lib/logger'\nimport {\n UserStartedSpeakingMessage,\n UserStoppedSpeakingMessage,\n SpeechTranscriptionMessage,\n EnableMicrophoneUpdatedMessage,\n SessionErrorMessage,\n DeviceErrorMessage,\n AvatarInterruptedMessage,\n type UneeqMessage,\n UneeqMessageType,\n type PromptResultMessage,\n type AvatarAnswerMessage,\n type CustomMetadataUpdated,\n} from './types/UneeqMessages'\nimport { type SpeechTranscriptionResult } from './types/SpeechTranscriptionResult'\nimport { type DataChannelMessage } from './webrtc-data-channel/DataChannelMessage'\nimport { UserSpeaking, UserSpeakingState } from './webrtc-data-channel/messages/UserSpeaking'\nimport { ChatPrompt } from './webrtc-data-channel/messages/ChatPrompt'\nimport { StopSpeaking } from './webrtc-data-channel/messages/StopSpeaking'\nimport { type PromptMetadata } from './types/PromptMetadata'\nimport { DeepgramClient } from '@deepgram/sdk'\nimport { type SpeechRecognitionInterface } from './types/SpeechRecognitionInterface'\n\n// Local interface for the Deepgram v5 live connection — duck-typed to avoid coupling to SDK type names\ninterface DeepgramLiveConnection {\n on(event: string, handler: (...args: unknown[]) => void): void\n sendMedia(data: ArrayBuffer): void\n connect(): void\n waitForOpen(): Promise<unknown>\n close(): void\n}\n\n// v5 SDK exposes listen.v1.connect() but doesn't type it publicly — duck-typed local interface\ninterface DeepgramV1Listen {\n v1: {\n connect(options: Record<string, unknown>): Promise<DeepgramLiveConnection>\n }\n}\n\n// Constants\nconst LOG_PREFIX = '[Deepgram STT]'\nconst CONNECTION_TIMEOUT_MS = 10000\nconst AUDIO_CHUNK_MS = 250\nconst AUDIO_BITS_PER_SECOND = 48000\n\n// Reconnection constants\nconst INITIAL_RECONNECT_DELAY_MS = 1000\nconst MAX_RECONNECT_DELAY_MS = 30000\nconst RECONNECT_BACKOFF_MULTIPLIER = 2\nconst MAX_RECONNECT_ATTEMPTS = 5\n\n// Safety net timeout - triggers if no Deepgram events received while in speaking state\nconst DEFAULT_SAFETY_NET_TIMEOUT_MS = 2000\n\n// Source of accumulated transcript submission\nenum AccumulatedTranscriptSource {\n FinalTranscript = 'Final transcript (from accumulated)',\n UtteranceEndFallback = 'UtteranceEnd fallback',\n SafetyNet = 'Safety net'\n}\n\n// STT Engine States\nenum STTState {\n Idle = 'Idle',\n Connecting = 'Connecting',\n Connected = 'Connected',\n Paused = 'Paused',\n Disconnected = 'Disconnected'\n}\n\n/**\n * Partial interface for Deepgram Live Transcription Response\n * See: https://developers.deepgram.com/reference/listen-live-websocket-messages\n */\ninterface LiveTranscriptionResponse {\n channel: {\n alternatives: Array<{\n transcript: string\n confidence: number\n words?: Array<{\n word: string\n start: number\n end: number\n confidence: number\n }>\n }>\n }\n is_final: boolean\n speech_final: boolean\n metadata: {\n request_id: string\n model_info: {\n name: string\n version: string\n arch: string\n }\n model_uuid: string\n }\n}\n\n/**\n * Interface for Deepgram UtteranceEnd event\n */\ninterface UtteranceEndResponse {\n type: 'UtteranceEnd'\n last_word_end: number\n channel: number[]\n}\n\n// Interruption threshold - minimum words needed to send a chat prompt while digital human is speaking\n// When the digital human is NOT speaking, all utterances are sent regardless of word count\n// Set to 1 for easy interruption, higher values (3-5) to require more intentional speech,\n// or very high (999) to effectively disable interruption\nconst INTERRUPTION_WORD_THRESHOLD = 3\n\nexport interface DeepgramSTTOptions {\n // Backend configuration\n connectionUrl: string\n jwtToken: string\n\n // Session information\n sessionId: string\n\n // Deepgram configuration\n model?: string\n language?: string\n smartFormat?: boolean\n interimResults?: boolean\n utteranceEndMs?: number\n vadEvents?: boolean\n encoding?: string\n sampleRate?: number\n channels?: number\n fillerWords?: boolean\n endpointing?: number\n\n // Interruption configuration - minimum words to send a prompt while digital human is speaking\n // Set to 1 for easy interruption, 3-5 for intentional phrases, 999 to disable interruption\n interruptionWordThreshold?: number\n\n /**\n * Keyterms to boost in transcription results.\n * Deepgram will give higher weight to these words/phrases during recognition.\n */\n keyterms?: string[]\n\n /**\n * Remove artificial delay from transcript delivery for lower latency.\n * @default false\n */\n noDelay?: boolean\n\n /**\n * Safety net timeout in milliseconds.\n * Triggers if no Deepgram events (transcripts/VAD) are received while the user is speaking.\n * This prevents the STT engine from getting \"stuck\" when Deepgram fails to detect end-of-speech.\n * Recommended: 3000ms - 10000ms. Defaults to 5000ms.\n */\n safetyNetTimeoutMs?: number\n\n // Microphone configuration\n echoCancellation?: boolean\n noiseSuppression?: boolean\n autoGainControl?: boolean\n microphoneDeviceId?: string\n\n // Metadata and callbacks\n promptMetadata: PromptMetadata\n messages: Subject<UneeqMessage>\n sendMessage: (msg: DataChannelMessage) => void\n}\n\ninterface DeepgramTokenResponse {\n token: string\n api_url: string\n sdk_version: string\n expires_at: string\n}\n\nexport class DeepgramSTT implements SpeechRecognitionInterface {\n private connection: DeepgramLiveConnection | null = null\n private state: STTState = STTState.Idle\n private shouldReconnect: boolean = true\n private microphone: MediaRecorder | null = null\n private stream: MediaStream | null = null\n\n // Reconnection state\n private reconnectAttempts: number = 0\n private reconnectDelay: number = INITIAL_RECONNECT_DELAY_MS\n private reconnectTimeoutId: NodeJS.Timeout | null = null\n\n // Digital human speaking state\n private digitalHumanSpeaking: boolean = false\n\n // User speaking state (for data channel messages to Renny - based on transcripts)\n private isUserCurrentlySpeaking: boolean = false\n\n // UI speaking state (for UI indicator - based on transcripts)\n private isUiShowingSpeaking: boolean = false\n\n // Accumulated transcript from is_final results within current utterance\n // Deepgram sends multiple is_final chunks for long utterances, which must be\n // concatenated until speech_final arrives to get the complete transcript\n private accumulatedTranscript: string = ''\n\n // Weighted average confidence: sum(confidence * words) / sum(words)\n private accumulatedConfidenceSum: number = 0\n private accumulatedWordCount: number = 0\n\n // Safety net timeout - tracks last Deepgram event to detect stuck states\n private lastDeepgramEventTime: number = 0\n private safetyNetTimeoutId: NodeJS.Timeout | null = null\n\n constructor(private readonly options: DeepgramSTTOptions) {\n // Apply defaults\n this.options.model = this.options.model || 'nova-3'\n this.options.language = this.options.language || 'en'\n this.options.smartFormat = this.options.smartFormat ?? true\n this.options.interimResults = this.options.interimResults ?? true\n this.options.utteranceEndMs = this.options.utteranceEndMs ?? 1500\n this.options.vadEvents = this.options.vadEvents ?? true\n this.options.fillerWords = this.options.fillerWords ?? false\n this.options.endpointing = this.options.endpointing ?? 500\n\n // Note: encoding, sampleRate, and channels are NOT set here\n // Let Deepgram auto-detect from the WebM/Opus container sent by MediaRecorder\n\n this.options.echoCancellation = this.options.echoCancellation ?? true\n this.options.noiseSuppression = this.options.noiseSuppression ?? true\n this.options.autoGainControl = this.options.autoGainControl ?? true\n\n // Interruption configuration\n this.options.interruptionWordThreshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n this.options.noDelay = this.options.noDelay ?? false\n this.options.safetyNetTimeoutMs = this.options.safetyNetTimeoutMs ?? DEFAULT_SAFETY_NET_TIMEOUT_MS\n\n if (this.options.safetyNetTimeoutMs <= 500) {\n Logger.warn(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to ${DEFAULT_SAFETY_NET_TIMEOUT_MS}ms.`)\n this.options.safetyNetTimeoutMs = DEFAULT_SAFETY_NET_TIMEOUT_MS\n } else if (this.options.safetyNetTimeoutMs <= 1000) {\n Logger.warn(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is ${DEFAULT_SAFETY_NET_TIMEOUT_MS}ms.`)\n } else {\n Logger.info(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`)\n }\n\n this.handleAppMessages()\n }\n\n // Main lifecycle methods\n public async startRecognition(): Promise<void> {\n Logger.info(`${LOG_PREFIX} Starting speech recognition`)\n this.shouldReconnect = true\n this.resetReconnectionState()\n await this.connect()\n }\n\n public async stopRecognition(): Promise<void> {\n Logger.info(`${LOG_PREFIX} Stopping speech recognition`)\n this.shouldReconnect = false\n this.clearReconnectTimeout()\n await this.disconnect()\n }\n\n public async pause(): Promise<boolean> {\n Logger.info(`${LOG_PREFIX} Pausing speech recognition`)\n this.state = STTState.Paused\n\n // Clear safety net timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n // Reset accumulated transcript - any partial speech is discarded on pause\n this.resetAccumulatedState()\n\n // Reset speaking states\n this.resetSpeakingStates()\n\n // Disable audio tracks to stop sending audio, but keep microphone and connection alive\n if (this.stream) {\n this.stream.getTracks().forEach((track) => { track.enabled = false })\n Logger.debug(`${LOG_PREFIX} Audio tracks disabled`)\n }\n\n // Note: We don't send EnableMicrophoneUpdatedMessage(false) because the microphone\n // should remain visually enabled in the UI even when paused\n return true\n }\n\n public async resume(): Promise<boolean> {\n Logger.info(`${LOG_PREFIX} Resuming speech recognition`)\n\n if (this.state === STTState.Paused) {\n if (this.stream) {\n // Re-enable existing audio tracks (resume from normal pause)\n this.state = STTState.Connected\n this.stream.getTracks().forEach((track) => { track.enabled = true })\n Logger.debug(`${LOG_PREFIX} Audio tracks re-enabled`)\n return true\n }\n // Connection exists but no stream (paused during connect) — start microphone\n if (this.connection) {\n this.state = STTState.Connected\n await this.startMicrophone()\n return true\n }\n // No connection and no stream — reset state so connect() doesn't bail out\n this.state = STTState.Disconnected\n }\n\n // No connection — need full connect\n Logger.debug(`${LOG_PREFIX} Initiating connection`)\n await this.connect()\n return true\n }\n\n // Metadata management\n public setChatMetadata(chatMetadata: PromptMetadata): void {\n this.options.promptMetadata = chatMetadata\n }\n\n // Private methods\n private async connect(): Promise<void> {\n if (this.state === STTState.Connected) {\n Logger.warn(`${LOG_PREFIX} Already connected`)\n return\n }\n\n if (this.state === STTState.Connecting) {\n Logger.warn(`${LOG_PREFIX} Connection already in progress`)\n return\n }\n\n this.state = STTState.Connecting\n\n try {\n const tokenData = await this.getToken()\n Logger.info(`${LOG_PREFIX} Connecting to Deepgram`)\n\n // CRITICAL: Must use { accessToken: token } format for temporary tokens\n // Passing raw string treats it as API key with wrong authorization scheme\n const deepgram = new DeepgramClient({\n accessToken: tokenData.token,\n baseUrl: tokenData.api_url\n })\n\n // Configure the live transcription connection\n // Don't specify encoding/sample_rate/channels - let Deepgram auto-detect from WebM/Opus\n // Note: v5 SDK requires booleans to be passed as strings (\"true\"/\"false\")\n const connectionOptions = {\n model: this.options.model,\n language: this.options.language,\n smart_format: String(this.options.smartFormat),\n interim_results: String(this.options.interimResults),\n utterance_end_ms: this.options.utteranceEndMs,\n vad_events: String(this.options.vadEvents),\n filler_words: String(this.options.fillerWords),\n endpointing: this.options.endpointing,\n // Always opt out of Deepgram's Model Improvement Program to prevent\n // customer audio data from being used for model training\n mip_opt_out: 'true',\n // Optional features — only included when explicitly enabled\n // keyterm (singular) is the v5 SDK parameter name for vocabulary boosting\n ...(this.options.keyterms && this.options.keyterms.length > 0 && { keyterm: this.options.keyterms }),\n // no_delay is not a named param in the v5 SDK, pass via queryParams\n ...(this.options.noDelay && { queryParams: { no_delay: 'true' } })\n }\n\n // v5: listen.v1.connect() returns a V1Socket wrapping a ReconnectingWebSocket.\n // The v5 SDK doesn't expose .v1 in its public TypeScript types — cast to the\n // local DeepgramV1Listen interface defined above.\n const v1Listen = deepgram.listen as unknown as DeepgramV1Listen\n this.connection = await v1Listen.v1.connect(connectionOptions)\n\n // Initiate the WebSocket and wait for it to open (with timeout).\n // V1Socket.on() stores only one handler per event (last wins), so no off() is needed —\n // setupEventHandlers() below will overwrite these one-time handlers.\n this.connection.connect()\n await Promise.race([\n this.connection.waitForOpen(),\n new Promise<void>((_, reject) =>\n setTimeout(() => reject(new Error('Connection timeout')), CONNECTION_TIMEOUT_MS)\n )\n ])\n\n // Don't overwrite Paused state (user may have paused during async connection).\n // TypeScript narrows this.state to Connecting after the await, so cast to bypass.\n if ((this.state as STTState) !== STTState.Paused) {\n this.state = STTState.Connected\n }\n Logger.info(`${LOG_PREFIX} Connection opened`)\n\n // Now set up the persistent event handlers\n this.setupEventHandlers()\n\n // If user paused during async connection, stay paused — don't start microphone\n // Note: pause() can be called externally during the await, mutating this.state\n // to Paused. TypeScript narrows this.state to Connecting, so we cast to bypass.\n if ((this.state as STTState) === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Pause requested during connection — staying paused`)\n this.resetReconnectionState()\n return\n }\n\n // Start the microphone\n await this.startMicrophone()\n\n Logger.info(`${LOG_PREFIX} Connected successfully`)\n\n // Reset reconnection state on successful connection\n this.resetReconnectionState()\n } catch (error) {\n this.state = STTState.Disconnected\n Logger.error(`${LOG_PREFIX} Connection error`, Logger.serialiseError(error))\n this.handleError(error)\n\n // Attempt reconnection with exponential backoff\n if (this.shouldReconnect) {\n this.scheduleReconnect()\n }\n }\n }\n\n private async disconnect(): Promise<void> {\n if (this.state === STTState.Idle || (this.state === STTState.Disconnected && !this.connection)) {\n return\n }\n\n Logger.info(`${LOG_PREFIX} Disconnecting`)\n\n // Clear safety net timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n try {\n this.stopMicrophone()\n\n if (this.connection) {\n this.connection.close()\n this.connection = null\n }\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Disconnect error`, Logger.serialiseError(error))\n }\n\n // Reset accumulated transcript and speaking states\n this.resetAccumulatedState()\n this.resetSpeakingStates()\n\n this.state = STTState.Disconnected\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(false))\n }\n\n private scheduleReconnect(): void {\n if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {\n Logger.error(`${LOG_PREFIX} Max reconnection attempts (${MAX_RECONNECT_ATTEMPTS}) reached`)\n this.clientMsgSend(new SessionErrorMessage(\n `Unable to connect to speech recognition service after ${MAX_RECONNECT_ATTEMPTS} attempts`\n ))\n return\n }\n\n this.reconnectAttempts++\n Logger.info(\n `${LOG_PREFIX} Scheduling reconnection attempt ${this.reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS} ` +\n `in ${this.reconnectDelay}ms`\n )\n\n this.reconnectTimeoutId = setTimeout(() => {\n void this.connect()\n }, this.reconnectDelay)\n\n // Exponential backoff\n this.reconnectDelay = Math.min(\n this.reconnectDelay * RECONNECT_BACKOFF_MULTIPLIER,\n MAX_RECONNECT_DELAY_MS\n )\n }\n\n private resetReconnectionState(): void {\n this.reconnectAttempts = 0\n this.reconnectDelay = INITIAL_RECONNECT_DELAY_MS\n this.clearReconnectTimeout()\n }\n\n private clearReconnectTimeout(): void {\n if (this.reconnectTimeoutId) {\n clearTimeout(this.reconnectTimeoutId)\n this.reconnectTimeoutId = null\n }\n }\n\n private async getToken(): Promise<DeepgramTokenResponse> {\n // Construct token endpoint from connectionUrl, include model for API version selection\n const model = this.options.model || 'nova-3'\n const tokenEndpoint = `${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(model)}`\n\n const response = await fetch(tokenEndpoint, {\n method: 'GET',\n headers: {\n Authorization: `Bearer ${this.options.jwtToken}`,\n 'Content-Type': 'application/json'\n }\n })\n\n if (!response.ok) {\n throw new Error(`Token fetch failed: ${response.status} ${response.statusText}`)\n }\n\n return await response.json()\n }\n\n private async startMicrophone(): Promise<void> {\n try {\n Logger.info(`${LOG_PREFIX} Starting microphone`)\n\n // Stop any existing microphone/stream first to prevent orphaned MediaRecorders\n // that would send interleaved audio data to the same Deepgram connection\n this.stopMicrophone()\n\n // Get user media - let browser use defaults for sample rate/channels with Opus\n if (!navigator.mediaDevices?.getUserMedia) {\n throw new Error('Microphone access is not available in this context')\n }\n this.stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n deviceId: this.options.microphoneDeviceId ? { exact: this.options.microphoneDeviceId } : undefined,\n echoCancellation: this.options.echoCancellation,\n noiseSuppression: this.options.noiseSuppression,\n autoGainControl: this.options.autoGainControl\n }\n })\n\n // Check if user paused during the getUserMedia await\n if ((this.state as STTState) === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Paused during getUserMedia — keeping stream but disabling tracks`)\n this.stream.getTracks().forEach((track) => { track.enabled = false })\n return\n }\n\n // Create MediaRecorder to capture audio with Opus codec for Deepgram\n // Deepgram requires Opus codec in WebM container\n this.microphone = new MediaRecorder(this.stream, {\n mimeType: 'audio/webm;codecs=opus',\n audioBitsPerSecond: AUDIO_BITS_PER_SECOND\n })\n\n this.microphone.ondataavailable = (event: BlobEvent) => {\n if (event.data.size > 0 && this.connection && this.state === STTState.Connected) {\n // Convert blob to ArrayBuffer and send to Deepgram\n event.data.arrayBuffer().then((arrayBuffer) => {\n this.connection?.sendMedia(arrayBuffer)\n }).catch((error) => {\n Logger.error(`${LOG_PREFIX} Error converting audio data`, Logger.serialiseError(error))\n })\n }\n }\n\n // Start recording in chunks\n this.microphone.start(AUDIO_CHUNK_MS)\n Logger.debug(`${LOG_PREFIX} Microphone started`)\n\n // Notify that microphone is enabled (but NOT that user is speaking yet)\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(true))\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Microphone error`, Logger.serialiseError(error))\n this.clientMsgSend(new DeviceErrorMessage(new Error(JSON.stringify(error))))\n }\n }\n\n private stopMicrophone(): void {\n if (this.microphone && this.microphone.state === 'recording') {\n this.microphone.stop()\n this.microphone = null\n }\n\n if (this.stream) {\n this.stream.getTracks().forEach((track) => {\n track.stop()\n })\n this.stream = null\n }\n\n Logger.debug(`${LOG_PREFIX} Microphone stopped`)\n }\n\n private setupEventHandlers(): void {\n if (!this.connection) {\n return\n }\n\n // V1Socket.on() stores only one handler per event (last-write wins) and has no off().\n // Each call below replaces any earlier handler for that event, which is the intended behaviour.\n this.connection.on('open', () => {\n this.handleConnectionOpen()\n })\n\n // v5: all transcription messages are unified under a single 'message' event,\n // discriminated by data.type ('Results', 'UtteranceEnd', 'Metadata', etc.)\n this.connection.on('message', (data: unknown) => {\n if (data !== null && typeof data === 'object' && 'type' in data) {\n const typed = data as { type: string }\n if (typed.type === 'Results') {\n this.handleTranscript(data as unknown as LiveTranscriptionResponse)\n } else if (typed.type === 'UtteranceEnd') {\n // UtteranceEnd fires when Deepgram detects a gap in word timings.\n // Critical for noisy environments where speech_final may never fire.\n this.handleUtteranceEnd(data as unknown as UtteranceEndResponse)\n } else if (typed.type === 'Metadata') {\n Logger.debug(`${LOG_PREFIX} Metadata`, data)\n }\n }\n })\n\n this.connection.on('close', () => {\n this.handleConnectionClose()\n })\n\n this.connection.on('error', (error: unknown) => {\n this.handleError(error)\n })\n }\n\n private handleAppMessages(): void {\n this.options.messages.subscribe((msg) => {\n switch (msg.uneeqMessageType) {\n case UneeqMessageType.AvatarStartedSpeaking:\n this.digitalHumanSpeaking = true\n break\n\n case UneeqMessageType.PromptResult: {\n const promptResultMessage = msg as PromptResultMessage\n if (!promptResultMessage.promptResult.success) {\n // The prompt failed, the digital human is not speaking\n this.handleSpeakingEnd()\n }\n break\n }\n\n case UneeqMessageType.AvatarAnswer: {\n const answer = msg as AvatarAnswerMessage\n if (answer.answerSpeech.replace(/<[^>]*>/g, '') === '') {\n // The response contained nothing to speak\n this.handleSpeakingEnd()\n }\n break\n }\n\n case UneeqMessageType.AvatarStoppedSpeaking: {\n this.handleSpeakingEnd()\n break\n }\n\n case UneeqMessageType.SessionEnded: {\n this.shouldReconnect = false\n void this.stopRecognition()\n break\n }\n\n case UneeqMessageType.SessionReconnecting: {\n this.handleSpeakingEnd()\n this.shouldReconnect = false\n void this.stopRecognition()\n break\n }\n\n case UneeqMessageType.CustomMetadataUpdated: {\n this.options.promptMetadata = (msg as CustomMetadataUpdated).chatMetadata\n break\n }\n\n case UneeqMessageType.SessionBackendError: {\n this.handleSpeakingEnd()\n break\n }\n\n default:\n }\n })\n }\n\n /**\n * Primary handler for incoming Deepgram transcript events.\n * Extracts transcript data, processes chunks, and updates speaking states.\n * @param data - The live transcription response from Deepgram\n */\n private handleTranscript(data: LiveTranscriptionResponse): void {\n try {\n this.lastDeepgramEventTime = Date.now()\n\n const channel = data.channel\n if (!channel?.alternatives || channel.alternatives.length === 0) {\n return\n }\n\n const alternative = channel.alternatives[0]\n const transcript: string = String(alternative.transcript || '')\n const isFinal = data.is_final\n const speechFinal = data.speech_final\n\n // Detailed debug logging for tracking transcript flow\n Logger.debug(`${LOG_PREFIX} Transcript event: is_final=${isFinal}, speech_final=${speechFinal}, ` +\n `transcript=\"${transcript.substring(0, 50)}${transcript.length > 50 ? '...' : ''}\", ` +\n `accumulated=\"${this.accumulatedTranscript.substring(0, 30)}${this.accumulatedTranscript.length > 30 ? '...' : ''}\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`)\n\n // Logic for empty transcript with speech_final\n if (transcript === '' && speechFinal) {\n if (this.endsWithPunctuation(this.accumulatedTranscript)) {\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.FinalTranscript)\n this.resetSpeakingStates()\n } else {\n Logger.debug(`${LOG_PREFIX} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`)\n }\n return\n }\n\n if (transcript === '') {\n return\n }\n\n this.processTranscriptChunk(transcript, alternative.confidence, isFinal, speechFinal)\n this.updateSpeakingAndInterruptionState(transcript, speechFinal)\n this.emitTranscriptionResult(transcript, alternative.confidence, speechFinal)\n\n this.resetSafetyNetTimeout()\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Error processing transcript`, Logger.serialiseError(error))\n }\n }\n\n /**\n * Processes a single transcript chunk, accumulating text and confidence scores.\n * @param transcript - The transcript text from the current chunk\n * @param confidence - The confidence score for the current chunk\n * @param isFinal - Whether Deepgram has marked this chunk as final\n * @param speechFinal - Whether Deepgram has detected the end of a speech segment\n */\n private processTranscriptChunk(transcript: string, confidence: number, isFinal: boolean, speechFinal: boolean): void {\n if (isFinal && !speechFinal) {\n if (this.accumulatedTranscript !== '') {\n this.accumulatedTranscript += ' '\n }\n this.accumulatedTranscript += transcript\n\n const chunkWordCount = transcript.trim().split(/\\s+/).length\n const chunkConfidence = confidence ?? 1.0\n this.accumulatedConfidenceSum += chunkConfidence * chunkWordCount\n this.accumulatedWordCount += chunkWordCount\n\n Logger.debug(`${LOG_PREFIX} Accumulated transcript: \"${this.accumulatedTranscript}\"`)\n }\n }\n\n /**\n * Evaluates whether the user's speech should trigger an interruption of the digital human\n * and updates the internal speaking states for Renny and the UI.\n * @param transcript - The current transcript text\n * @param speechFinal - Whether the current segment is marked as speech_final\n */\n private updateSpeakingAndInterruptionState(transcript: string, speechFinal: boolean): void {\n const effectiveTranscript = this.accumulatedTranscript !== '' ? this.accumulatedTranscript : transcript\n const wordCount = effectiveTranscript.trim().split(/\\s+/).length\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n\n if (!this.isUiShowingSpeaking) {\n this.isUiShowingSpeaking = true\n this.clientMsgSend(new UserStartedSpeakingMessage())\n }\n\n const willTriggerAction = !this.digitalHumanSpeaking || wordCount >= threshold\n\n const fullTranscript = this.accumulatedTranscript !== ''\n ? this.accumulatedTranscript + ' ' + transcript\n : transcript\n const speechLooksLikeFinal = speechFinal && this.endsWithPunctuation(fullTranscript)\n\n if (!this.isUserCurrentlySpeaking && willTriggerAction) {\n this.isUserCurrentlySpeaking = true\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Start))\n }\n\n if (this.digitalHumanSpeaking && wordCount >= threshold) {\n Logger.debug(`${LOG_PREFIX} Interrupting digital human`)\n this.dataChannelMsgSend(new StopSpeaking())\n this.clientMsgSend(new AvatarInterruptedMessage())\n this.digitalHumanSpeaking = false\n }\n\n if (this.isUserCurrentlySpeaking && speechLooksLikeFinal) {\n this.isUserCurrentlySpeaking = false\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Stop))\n }\n\n if (this.isUiShowingSpeaking && speechLooksLikeFinal) {\n this.isUiShowingSpeaking = false\n this.clientMsgSend(new UserStoppedSpeakingMessage())\n }\n }\n\n /**\n * Emits transcription results to the client (for closed captions) and sends\n * chat prompts to Renny if the utterance has reached a final state.\n * @param transcript - The current segment transcript\n * @param confidence - The confidence score for the current segment\n * @param speechFinal - Whether the segment is speech_final\n */\n private emitTranscriptionResult(transcript: string, confidence: number, speechFinal: boolean): void {\n const fullTranscript = this.accumulatedTranscript !== ''\n ? this.accumulatedTranscript + ' ' + transcript\n : transcript\n const speechLooksLikeFinal = speechFinal && this.endsWithPunctuation(fullTranscript)\n\n const result: SpeechTranscriptionResult = {\n transcript: speechFinal ? fullTranscript : transcript,\n final: speechLooksLikeFinal,\n confidence: confidence ?? 1.0,\n language_code: this.options.language || ''\n }\n\n // Emit the transcription message (for closed captions)\n this.clientMsgSend(new SpeechTranscriptionMessage(result))\n\n if (speechLooksLikeFinal) {\n const fullWordCount = fullTranscript.trim().split(/\\s+/).length\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n if (this.digitalHumanSpeaking && fullWordCount < threshold) {\n Logger.debug(`${LOG_PREFIX} Discarding utterance during speaking (${fullWordCount} words < ${threshold} threshold): \"${fullTranscript}\"`)\n } else {\n Logger.info(`${LOG_PREFIX} Final transcript: \"${fullTranscript}\"`)\n this.sendChatPrompt(fullTranscript)\n }\n this.resetAccumulatedState()\n } else if (speechFinal) {\n if (this.accumulatedTranscript !== '') {\n this.accumulatedTranscript += ' '\n }\n this.accumulatedTranscript += transcript\n\n const chunkWordCount = transcript.trim().split(/\\s+/).length\n this.accumulatedConfidenceSum += (confidence ?? 1.0) * chunkWordCount\n this.accumulatedWordCount += chunkWordCount\n\n Logger.debug(`${LOG_PREFIX} speech_final without punctuation, accumulated for safety net: \"${this.accumulatedTranscript}\"`)\n }\n }\n\n /**\n * Handle UtteranceEnd event from Deepgram.\n * This fires when Deepgram detects a gap in word timings, even in noisy environments\n * where speech_final may never fire due to VAD detecting background noise as \"audio activity\".\n */\n private handleUtteranceEnd(data: UtteranceEndResponse): void {\n this.lastDeepgramEventTime = Date.now()\n\n Logger.debug(`${LOG_PREFIX} UtteranceEnd event received: last_word_end=${data?.last_word_end}, ` +\n `accumulated=\"${this.accumulatedTranscript.substring(0, 50)}...\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`)\n\n // If we have accumulated transcript that wasn't sent via speech_final, check if we should send it\n if (this.accumulatedTranscript.trim() !== '') {\n if (this.endsWithPunctuation(this.accumulatedTranscript)) {\n // Punctuation detected - user appears to be done speaking\n // Clear safety net and send the transcript\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n Logger.debug(`${LOG_PREFIX} UtteranceEnd fallback triggered with transcript: \"${this.accumulatedTranscript}\"`)\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.UtteranceEndFallback)\n this.resetSpeakingStates()\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: reset speaking states`)\n } else {\n // No punctuation - user might still be mid-utterance (just pausing)\n // DON'T clear safety net - let it fire after timeout if no more speech arrives\n // DON'T reset speaking states - user is still mid-utterance\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: no punctuation, waiting for more speech or safety net`)\n }\n } else {\n // No accumulated transcript - nothing to send\n // Clear safety net and reset states\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n this.resetSpeakingStates()\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: no transcript, reset speaking states`)\n }\n }\n\n /**\n * Reset the safety net timeout. Called whenever we receive a Deepgram event.\n * If we're in a speaking state and have accumulated transcript, start a new timeout.\n */\n private resetSafetyNetTimeout(): void {\n // Clear any existing timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n // Only set timeout if we're in a speaking state with accumulated transcript\n if ((this.isUiShowingSpeaking || this.isUserCurrentlySpeaking) && this.accumulatedTranscript.trim() !== '') {\n this.safetyNetTimeoutId = setTimeout(() => {\n this.triggerSafetyNet()\n }, this.options.safetyNetTimeoutMs)\n }\n }\n\n /**\n * Safety net trigger - called when no Deepgram events received for safetyNetTimeoutMs\n * while in a speaking state with accumulated transcript. This handles truly stuck states.\n */\n private triggerSafetyNet(): void {\n Logger.warn(`${LOG_PREFIX} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`)\n Logger.debug(`${LOG_PREFIX} Safety net triggered: accumulated=\"${this.accumulatedTranscript}\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, ` +\n `timeSinceLastEvent=${Date.now() - this.lastDeepgramEventTime}ms`)\n\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.SafetyNet)\n this.resetSpeakingStates()\n this.safetyNetTimeoutId = null\n }\n\n /**\n * Send the accumulated transcript as a final result.\n * Handles emitting SpeechTranscriptionMessage, gating by word threshold, and sending ChatPrompt.\n * @param source - Description of what triggered this (for logging)\n */\n private sendAccumulatedTranscript(source: AccumulatedTranscriptSource): void {\n if (this.accumulatedTranscript.trim() === '') {\n return\n }\n\n if (source !== AccumulatedTranscriptSource.SafetyNet && !this.endsWithPunctuation(this.accumulatedTranscript)) {\n Logger.debug(`${LOG_PREFIX} sendAccumulatedTranscript: not sending as source is ${source} and transcript doesn't end in punctuation: \"${this.accumulatedTranscript}\"`)\n return\n }\n\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n\n // Calculate weighted average confidence from accumulated chunks\n const avgConfidence = this.accumulatedWordCount > 0\n ? this.accumulatedConfidenceSum / this.accumulatedWordCount\n : 1.0\n\n // Emit final transcription message for closed captions\n const result: SpeechTranscriptionResult = {\n transcript: this.accumulatedTranscript,\n final: true,\n confidence: avgConfidence,\n language_code: this.options.language || ''\n }\n this.clientMsgSend(new SpeechTranscriptionMessage(result))\n\n // Gate chat prompts during digital human speaking by word threshold\n const wordCount = this.accumulatedTranscript.trim().split(/\\s+/).length\n if (this.digitalHumanSpeaking && wordCount < threshold) {\n Logger.debug(`${LOG_PREFIX} Discarding utterance during speaking (${wordCount} words < ${threshold} threshold): \"${this.accumulatedTranscript}\"`)\n } else {\n Logger.info(`${LOG_PREFIX} ${source}: \"${this.accumulatedTranscript}\"`)\n this.sendChatPrompt(this.accumulatedTranscript)\n }\n\n // Reset accumulated state\n this.resetAccumulatedState()\n }\n\n /**\n * Check if a string ends with sentence-ending punctuation.\n * @param text - The text to check\n * @returns true if the text ends with punctuation (. ! ? etc.)\n */\n private endsWithPunctuation(text: string): boolean {\n const trimmed = text.trim()\n if (trimmed.length === 0) {\n return false\n }\n // Match common sentence-ending punctuation marks\n return /[.!?;:]$/.test(trimmed)\n }\n\n /**\n * Reset accumulated transcript and confidence state.\n */\n private resetAccumulatedState(): void {\n this.accumulatedTranscript = ''\n this.accumulatedConfidenceSum = 0\n this.accumulatedWordCount = 0\n }\n\n /**\n * Reset speaking states and send appropriate stop messages.\n */\n private resetSpeakingStates(): void {\n if (this.isUserCurrentlySpeaking) {\n this.isUserCurrentlySpeaking = false\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Stop))\n }\n\n if (this.isUiShowingSpeaking) {\n this.isUiShowingSpeaking = false\n this.clientMsgSend(new UserStoppedSpeakingMessage())\n }\n }\n\n private handleConnectionOpen(): void {\n // Don't overwrite Paused state (user may have paused during async connection)\n if (this.state !== STTState.Paused) {\n this.state = STTState.Connected\n }\n }\n\n private handleConnectionClose(): void {\n Logger.info(`${LOG_PREFIX} Connection closed`)\n\n // If user has paused, don't overwrite Paused state and don't reconnect.\n // The connection closing while paused is expected (Deepgram closes idle connections).\n // resume() will reconnect when the user unmutes.\n if (this.state === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Connection closed while paused — will reconnect on resume`)\n this.connection = null\n this.stopMicrophone()\n return\n }\n\n this.state = STTState.Disconnected\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(false))\n\n // Attempt reconnection with exponential backoff\n if (this.shouldReconnect) {\n Logger.info(`${LOG_PREFIX} Unexpected disconnect, attempting reconnection...`)\n this.scheduleReconnect()\n }\n }\n\n private handleError(error: unknown): void {\n Logger.error(`${LOG_PREFIX} Error occurred`, error)\n\n const errorMessage = error instanceof Error\n ? error.message\n : String(error)\n\n // Check if it's a microphone/device error\n if (errorMessage.includes('microphone') || errorMessage.includes('permission') || errorMessage.includes('getUserMedia')) {\n this.clientMsgSend(new DeviceErrorMessage(new Error(errorMessage)))\n } else {\n this.clientMsgSend(new SessionErrorMessage(errorMessage))\n }\n }\n\n private sendChatPrompt(transcript: string): void {\n if (transcript && transcript.trim() !== '') {\n // Set the user's spoken locale if detected\n if (this.options.language) {\n this.options.promptMetadata.userSpokenLocale = this.options.language\n }\n\n this.dataChannelMsgSend(new ChatPrompt(transcript, this.options.promptMetadata))\n }\n }\n\n private handleSpeakingEnd(): void {\n this.digitalHumanSpeaking = false\n }\n\n // Send a message on the data channel to renderer\n private dataChannelMsgSend(msg: DataChannelMessage): void {\n this.options.sendMessage(msg)\n }\n\n // Send a message to the client implementation, i.e. hosted experience\n private clientMsgSend(msg: UneeqMessage): void {\n this.options.messages.next(msg)\n }\n}\n"],"names":["LOG_PREFIX","AccumulatedTranscriptSource","STTState","DeepgramSTT","options","connection","state","Idle","shouldReconnect","microphone","stream","reconnectAttempts","reconnectDelay","reconnectTimeoutId","digitalHumanSpeaking","isUserCurrentlySpeaking","isUiShowingSpeaking","accumulatedTranscript","accumulatedConfidenceSum","accumulatedWordCount","lastDeepgramEventTime","safetyNetTimeoutId","constructor","this","model","language","smartFormat","interimResults","utteranceEndMs","vadEvents","fillerWords","endpointing","echoCancellation","noiseSuppression","autoGainControl","interruptionWordThreshold","noDelay","safetyNetTimeoutMs","warn","info","handleAppMessages","startRecognition","resetReconnectionState","connect","stopRecognition","clearReconnectTimeout","disconnect","pause","Paused","clearTimeout","resetAccumulatedState","resetSpeakingStates","getTracks","forEach","track","enabled","debug","resume","Connected","startMicrophone","Disconnected","setChatMetadata","chatMetadata","promptMetadata","Connecting","tokenData","getToken","deepgram","accessToken","token","baseUrl","api_url","connectionOptions","smart_format","String","interim_results","utterance_end_ms","vad_events","filler_words","mip_opt_out","keyterms","length","keyterm","queryParams","no_delay","v1Listen","listen","v1","Promise","race","waitForOpen","_","reject","setTimeout","Error","setupEventHandlers","error","serialiseError","handleError","scheduleReconnect","stopMicrophone","close","clientMsgSend","Math","min","tokenEndpoint","connectionUrl","encodeURIComponent","response","fetch","method","headers","Authorization","jwtToken","ok","status","statusText","json","navigator","mediaDevices","getUserMedia","audio","deviceId","microphoneDeviceId","exact","undefined","MediaRecorder","mimeType","audioBitsPerSecond","ondataavailable","event","data","size","arrayBuffer","then","sendMedia","catch","start","JSON","stringify","stop","on","handleConnectionOpen","typed","type","handleTranscript","handleUtteranceEnd","handleConnectionClose","messages","subscribe","msg","uneeqMessageType","AvatarStartedSpeaking","PromptResult","promptResult","success","handleSpeakingEnd","AvatarAnswer","answerSpeech","replace","AvatarStoppedSpeaking","SessionEnded","SessionReconnecting","CustomMetadataUpdated","SessionBackendError","Date","now","channel","alternatives","alternative","transcript","isFinal","is_final","speechFinal","speech_final","substring","endsWithPunctuation","sendAccumulatedTranscript","FinalTranscript","processTranscriptChunk","confidence","updateSpeakingAndInterruptionState","emitTranscriptionResult","resetSafetyNetTimeout","chunkWordCount","trim","split","chunkConfidence","wordCount","threshold","willTriggerAction","fullTranscript","speechLooksLikeFinal","dataChannelMsgSend","Start","Stop","result","final","language_code","fullWordCount","sendChatPrompt","last_word_end","UtteranceEndFallback","triggerSafetyNet","SafetyNet","source","avgConfidence","text","trimmed","test","errorMessage","message","includes","userSpokenLocale","sendMessage","next"],"sourceRoot":""}
1
+ {"version":3,"file":"3.index.js","mappings":"oPA2CA,MAAMA,EAAa,iBAenB,IAAKC,EAOAC,GAPL,SAAKD,GACD,wDACA,+CACA,wBACH,CAJD,CAAKA,IAAAA,EAA2B,KAOhC,SAAKC,GACD,cACA,0BACA,wBACA,kBACA,6BACH,CAND,CAAKA,IAAAA,EAAQ,KAqHN,MAAMC,EAkCoBC,QAjCrBC,WAA4C,KAC5CC,MAAkBJ,EAASK,KAC3BC,iBAA2B,EAC3BC,WAAmC,KACnCC,OAA6B,KAG7BC,kBAA4B,EAC5BC,eA9IuB,IA+IvBC,mBAA4C,KAG5CC,sBAAgC,EAGhCC,yBAAmC,EAGnCC,qBAA+B,EAK/BC,sBAAgC,GAGhCC,yBAAmC,EACnCC,qBAA+B,EAG/BC,sBAAgC,EAChCC,mBAA4C,KAEpD,WAAAC,CAA6BlB,GAAA,KAAAA,QAAAA,EAEzBmB,KAAKnB,QAAQoB,MAAQD,KAAKnB,QAAQoB,OAAS,SAC3CD,KAAKnB,QAAQqB,SAAWF,KAAKnB,QAAQqB,UAAY,KACjDF,KAAKnB,QAAQsB,YAAcH,KAAKnB,QAAQsB,cAAe,EACvDH,KAAKnB,QAAQuB,eAAiBJ,KAAKnB,QAAQuB,iBAAkB,EAC7DJ,KAAKnB,QAAQwB,eAAiBL,KAAKnB,QAAQwB,gBAAkB,KAC7DL,KAAKnB,QAAQyB,UAAYN,KAAKnB,QAAQyB,YAAa,EACnDN,KAAKnB,QAAQ0B,YAAcP,KAAKnB,QAAQ0B,cAAe,EACvDP,KAAKnB,QAAQ2B,YAAcR,KAAKnB,QAAQ2B,aAAe,IAKvDR,KAAKnB,QAAQ4B,iBAAmBT,KAAKnB,QAAQ4B,mBAAoB,EACjET,KAAKnB,QAAQ6B,iBAAmBV,KAAKnB,QAAQ6B,mBAAoB,EACjEV,KAAKnB,QAAQ8B,gBAAkBX,KAAKnB,QAAQ8B,kBAAmB,EAG/DX,KAAKnB,QAAQ+B,0BAA4BZ,KAAKnB,QAAQ+B,2BAvH1B,EAwH5BZ,KAAKnB,QAAQgC,QAAUb,KAAKnB,QAAQgC,UAAW,EAC/Cb,KAAKnB,QAAQiC,mBAAqBd,KAAKnB,QAAQiC,oBAtLjB,IAwL1Bd,KAAKnB,QAAQiC,oBAAsB,KACnC,IAAOC,KAAK,GAAGtC,kCAA2CuB,KAAKnB,QAAQiC,uIACvEd,KAAKnB,QAAQiC,mBA1La,KA2LnBd,KAAKnB,QAAQiC,oBAAsB,IAC1C,IAAOC,KAAK,GAAGtC,kCAA2CuB,KAAKnB,QAAQiC,0HAEvE,IAAOE,KAAK,GAAGvC,kCAA2CuB,KAAKnB,QAAQiC,yBAG3Ed,KAAKiB,mBACT,CAGO,sBAAMC,GACT,IAAOF,KAAK,GAAGvC,iCACfuB,KAAKf,iBAAkB,EACvBe,KAAKmB,+BACCnB,KAAKoB,SACf,CAEO,qBAAMC,GACT,IAAOL,KAAK,GAAGvC,iCACfuB,KAAKf,iBAAkB,EACvBe,KAAKsB,8BACCtB,KAAKuB,YACf,CAEO,WAAMC,GAwBT,OAvBA,IAAOR,KAAK,GAAGvC,gCACfuB,KAAKjB,MAAQJ,EAAS8C,OAGlBzB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAI9BE,KAAK2B,wBAGL3B,KAAK4B,sBAGD5B,KAAKb,SACLa,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAC7D,IAAOC,MAAM,GAAGxD,6BAKb,CACX,CAEO,YAAMyD,GAGT,GAFA,IAAOlB,KAAK,GAAGvC,iCAEXuB,KAAKjB,QAAUJ,EAAS8C,OAAQ,CAChC,GAAIzB,KAAKb,OAKL,OAHAa,KAAKjB,MAAQJ,EAASwD,UACtBnC,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAC7D,IAAOC,MAAM,GAAGxD,8BACT,EAGX,GAAIuB,KAAKlB,WAGL,OAFAkB,KAAKjB,MAAQJ,EAASwD,gBAChBnC,KAAKoC,mBACJ,EAGXpC,KAAKjB,MAAQJ,EAAS0D,YAC1B,CAKA,OAFA,IAAOJ,MAAM,GAAGxD,iCACVuB,KAAKoB,WACJ,CACX,CAGO,eAAAkB,CAAgBC,GACnBvC,KAAKnB,QAAQ2D,eAAiBD,CAClC,CAGQ,aAAMnB,GACV,GAAIpB,KAAKjB,QAAUJ,EAASwD,UAK5B,GAAInC,KAAKjB,QAAUJ,EAAS8D,WAA5B,CAKAzC,KAAKjB,MAAQJ,EAAS8D,WAEtB,IACI,MAAMC,QAAkB1C,KAAK2C,WAC7B,IAAO3B,KAAK,GAAGvC,4BAIf,MAAMmE,EAAW,IAAI,IAAe,CAChCC,YAAaH,EAAUI,MACvBC,QAASL,EAAUM,UAMjBC,EAAoB,CACtBhD,MAAOD,KAAKnB,QAAQoB,MACpBC,SAAUF,KAAKnB,QAAQqB,SACvBgD,aAAcC,OAAOnD,KAAKnB,QAAQsB,aAClCiD,gBAAiBD,OAAOnD,KAAKnB,QAAQuB,gBACrCiD,iBAAkBrD,KAAKnB,QAAQwB,eAC/BiD,WAAYH,OAAOnD,KAAKnB,QAAQyB,WAChCiD,aAAcJ,OAAOnD,KAAKnB,QAAQ0B,aAClCC,YAAaR,KAAKnB,QAAQ2B,YAG1BgD,YAAa,UAGTxD,KAAKnB,QAAQ4E,UAAYzD,KAAKnB,QAAQ4E,SAASC,OAAS,GAAK,CAAEC,QAAS3D,KAAKnB,QAAQ4E,aAErFzD,KAAKnB,QAAQgC,SAAW,CAAE+C,YAAa,CAAEC,SAAU,UAMrDC,EAAWlB,EAASmB,OA2B1B,GA1BA/D,KAAKlB,iBAAmBgF,EAASE,GAAG5C,QAAQ6B,GAK5CjD,KAAKlB,WAAWsC,gBACV6C,QAAQC,KAAK,CACflE,KAAKlB,WAAWqF,cAChB,IAAIF,QAAc,CAACG,EAAGC,IAClBC,WAAW,IAAMD,EAAO,IAAIE,MAAM,uBArVxB,QA2VbvE,KAAKjB,QAAuBJ,EAAS8C,SACtCzB,KAAKjB,MAAQJ,EAASwD,WAE1B,IAAOnB,KAAK,GAAGvC,uBAGfuB,KAAKwE,qBAKAxE,KAAKjB,QAAuBJ,EAAS8C,OAGtC,OAFA,IAAOT,KAAK,GAAGvC,6DACfuB,KAAKmB,+BAKHnB,KAAKoC,kBAEX,IAAOpB,KAAK,GAAGvC,4BAGfuB,KAAKmB,wBACT,CAAE,MAAOsD,GACLzE,KAAKjB,MAAQJ,EAAS0D,aACtB,IAAOoC,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,IAIjEzE,KAAKf,kBACLe,KAAK2E,mBAAmBF,GACxBzE,KAAK4E,oBAEb,CA1FA,MAFI,IAAO7D,KAAK,GAAGtC,yCALf,IAAOsC,KAAK,GAAGtC,sBAkGvB,CAEQ,gBAAM8C,GACV,GAAIvB,KAAKjB,QAAUJ,EAASK,OAASgB,KAAKjB,QAAUJ,EAAS0D,cAAiBrC,KAAKlB,YAAnF,CAIA,IAAOkC,KAAK,GAAGvC,mBAGXuB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAG9B,IACIE,KAAK6E,iBAED7E,KAAKlB,aACLkB,KAAKlB,WAAWgG,QAChB9E,KAAKlB,WAAa,KAE1B,CAAE,MAAO2F,GACL,IAAOA,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,GACzE,CAGAzE,KAAK2B,wBACL3B,KAAK4B,sBAEL5B,KAAKjB,MAAQJ,EAAS0D,aACtBrC,KAAK+E,cAAc,IAAI,MAA+B,GA1BtD,CA2BJ,CAEQ,iBAAAH,GACJ,GAAI5E,KAAKZ,mBAzZc,EA8ZnB,OAJA,IAAOqF,MAAM,GAAGhG,gDAChBuB,KAAK+E,cAAc,IAAI,KACnB,qEAKR/E,KAAKZ,oBACL,IAAO4B,KACH,GAAGvC,qCAA8CuB,KAAKZ,0BAChDY,KAAKX,oBAGfW,KAAKV,mBAAqBgF,WAAW,KAC5BtE,KAAKoB,WACXpB,KAAKX,gBAGRW,KAAKX,eAAiB2F,KAAKC,IA7aE,EA8azBjF,KAAKX,eA/ac,IAkb3B,CAEQ,sBAAA8B,GACJnB,KAAKZ,kBAAoB,EACzBY,KAAKX,eAvbsB,IAwb3BW,KAAKsB,uBACT,CAEQ,qBAAAA,GACAtB,KAAKV,qBACLoC,aAAa1B,KAAKV,oBAClBU,KAAKV,mBAAqB,KAElC,CAEQ,cAAMqD,GAEV,MAAM1C,EAAQD,KAAKnB,QAAQoB,OAAS,SAC9BiF,EAAgB,GAAGlF,KAAKnB,QAAQsG,iEAAiEC,mBAAmBnF,KAEpHoF,QAAiBC,MAAMJ,EAAe,CACxCK,OAAQ,MACRC,QAAS,CACLC,cAAe,UAAUzF,KAAKnB,QAAQ6G,WACtC,eAAgB,sBAIxB,IAAKL,EAASM,GACV,MAAM,IAAIpB,MAAM,uBAAuBc,EAASO,UAAUP,EAASQ,cAGvE,aAAaR,EAASS,MAC1B,CAEQ,qBAAM1D,GACV,IAQI,GAPA,IAAOpB,KAAK,GAAGvC,yBAIfuB,KAAK6E,kBAGAkB,UAAUC,cAAcC,aACzB,MAAM,IAAI1B,MAAM,sDAYpB,GAVAvE,KAAKb,aAAe4G,UAAUC,aAAaC,aAAa,CACpDC,MAAO,CACHC,SAAUnG,KAAKnB,QAAQuH,mBAAqB,CAAEC,MAAOrG,KAAKnB,QAAQuH,yBAAuBE,EACzF7F,iBAAkBT,KAAKnB,QAAQ4B,iBAC/BC,iBAAkBV,KAAKnB,QAAQ6B,iBAC/BC,gBAAiBX,KAAKnB,QAAQ8B,mBAKjCX,KAAKjB,QAAuBJ,EAAS8C,OAGtC,OAFA,IAAOT,KAAK,GAAGvC,2EACfuB,KAAKb,OAAO0C,YAAYC,QAASC,IAAYA,EAAMC,SAAU,IAMjEhC,KAAKd,WAAa,IAAIqH,cAAcvG,KAAKb,OAAQ,CAC7CqH,SAAU,yBACVC,mBAzfc,OA4flBzG,KAAKd,WAAWwH,gBAAmBC,IAC3BA,EAAMC,KAAKC,KAAO,GAAK7G,KAAKlB,YAAckB,KAAKjB,QAAUJ,EAASwD,WAElEwE,EAAMC,KAAKE,cAAcC,KAAMD,IAC3B9G,KAAKlB,YAAYkI,UAAUF,KAC5BG,MAAOxC,IACN,IAAOA,MAAM,GAAGhG,gCAA0C,IAAOiG,eAAeD,OAM5FzE,KAAKd,WAAWgI,MAzgBL,KA0gBX,IAAOjF,MAAM,GAAGxD,wBAGhBuB,KAAK+E,cAAc,IAAI,MAA+B,GAC1D,CAAE,MAAON,GACL,IAAOA,MAAM,GAAGhG,qBAA+B,IAAOiG,eAAeD,IACrEzE,KAAK+E,cAAc,IAAI,KAAmB,IAAIR,MAAM4C,KAAKC,UAAU3C,KACvE,CACJ,CAEQ,cAAAI,GACA7E,KAAKd,YAAwC,cAA1Bc,KAAKd,WAAWH,QACnCiB,KAAKd,WAAWmI,OAChBrH,KAAKd,WAAa,MAGlBc,KAAKb,SACLa,KAAKb,OAAO0C,YAAYC,QAASC,IAC7BA,EAAMsF,SAEVrH,KAAKb,OAAS,MAGlB,IAAO8C,MAAM,GAAGxD,uBACpB,CAEQ,kBAAA+F,GACCxE,KAAKlB,aAMVkB,KAAKlB,WAAWwI,GAAG,OAAQ,KACvBtH,KAAKuH,yBAKTvH,KAAKlB,WAAWwI,GAAG,UAAYV,IAC3B,GAAa,OAATA,GAAiC,iBAATA,GAAqB,SAAUA,EAAM,CAC7D,MAAMY,EAAQZ,EACK,YAAfY,EAAMC,KACNzH,KAAK0H,iBAAiBd,GACA,iBAAfY,EAAMC,KAGbzH,KAAK2H,mBAAmBf,GACF,aAAfY,EAAMC,MACb,IAAOxF,MAAM,GAAGxD,aAAuBmI,EAE/C,IAGJ5G,KAAKlB,WAAWwI,GAAG,QAAS,KACxBtH,KAAK4H,0BAGT5H,KAAKlB,WAAWwI,GAAG,QAAU7C,IACzB,IAAOA,MAAM,GAAGhG,0BAAoC,IAAOiG,eAAeD,IAC1EzE,KAAK2E,mBAAmBF,KAEhC,CAEQ,iBAAAxD,GACJjB,KAAKnB,QAAQgJ,SAASC,UAAWC,IAC7B,OAAQA,EAAIC,kBACZ,KAAK,KAAiBC,sBAClBjI,KAAKT,sBAAuB,EAC5B,MAEJ,KAAK,KAAiB2I,aACUH,EACHI,aAAaC,SAElCpI,KAAKqI,oBAET,MAGJ,KAAK,KAAiBC,aAEkC,KADrCP,EACJQ,aAAaC,QAAQ,WAAY,KAExCxI,KAAKqI,oBAET,MAGJ,KAAK,KAAiBI,sBAClBzI,KAAKqI,oBACL,MAGJ,KAAK,KAAiBK,aAClB1I,KAAKf,iBAAkB,EAClBe,KAAKqB,kBACV,MAGJ,KAAK,KAAiBsH,oBAClB3I,KAAKqI,oBACLrI,KAAKf,iBAAkB,EAClBe,KAAKqB,kBACV,MAGJ,KAAK,KAAiBuH,sBAClB5I,KAAKnB,QAAQ2D,eAAkBuF,EAA8BxF,aAC7D,MAGJ,KAAK,KAAiBsG,oBAClB7I,KAAKqI,sBAOjB,CAOQ,gBAAAX,CAAiBd,GACrB,IACI5G,KAAKH,sBAAwBiJ,KAAKC,MAElC,MAAMC,EAAUpC,EAAKoC,QACrB,IAAKA,GAASC,cAAgD,IAAhCD,EAAQC,aAAavF,OAC/C,OAGJ,MAAMwF,EAAcF,EAAQC,aAAa,GACnCE,EAAqBhG,OAAO+F,EAAYC,YAAc,IACtDC,EAAUxC,EAAKyC,SACfC,EAAc1C,EAAK2C,aASzB,GANA,IAAOtH,MAAM,GAAGxD,gCAAyC2K,mBAAyBE,kBAC/DH,EAAWK,UAAU,EAAG,MAAML,EAAWzF,OAAS,GAAK,MAAQ,qBAC9D1D,KAAKN,sBAAsB8J,UAAU,EAAG,MAAMxJ,KAAKN,sBAAsBgE,OAAS,GAAK,MAAQ,mBACjG1D,KAAKP,qCAAqCO,KAAKR,2BAG9C,KAAf2J,GAAqBG,EAOrB,YANItJ,KAAKyJ,oBAAoBzJ,KAAKN,wBAC9BM,KAAK0J,0BAA0BhL,EAA4BiL,iBAC3D3J,KAAK4B,uBAEL,IAAOK,MAAM,GAAGxD,mGAKxB,GAAmB,KAAf0K,EACA,OAGJnJ,KAAK4J,uBAAuBT,EAAYD,EAAYW,WAAYT,EAASE,GACzEtJ,KAAK8J,mCAAmCX,EAAYG,GACpDtJ,KAAK+J,wBAAwBZ,EAAYD,EAAYW,WAAYP,GAEjEtJ,KAAKgK,uBACT,CAAE,MAAOvF,GACL,IAAOA,MAAM,GAAGhG,gCAA0C,IAAOiG,eAAeD,GACpF,CACJ,CASQ,sBAAAmF,CAAuBT,EAAoBU,EAAoBT,EAAkBE,GACrF,GAAIF,IAAYE,EAAa,CACU,KAA/BtJ,KAAKN,wBACLM,KAAKN,uBAAyB,KAElCM,KAAKN,uBAAyByJ,EAE9B,MAAMc,EAAiBd,EAAWe,OAAOC,MAAM,OAAOzG,OAChD0G,EAAkBP,GAAc,EACtC7J,KAAKL,0BAA4ByK,EAAkBH,EACnDjK,KAAKJ,sBAAwBqK,EAE7B,IAAOhI,MAAM,GAAGxD,8BAAuCuB,KAAKN,yBAChE,CACJ,CAQQ,kCAAAoK,CAAmCX,EAAoBG,GAC3D,MACMe,GADqD,KAA/BrK,KAAKN,sBAA+BM,KAAKN,sBAAwByJ,GACvDe,OAAOC,MAAM,OAAOzG,OACpD4G,EAAYtK,KAAKnB,QAAQ+B,2BA/oBH,EAipBvBZ,KAAKP,sBACNO,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,OAG3B,MAAMwF,GAAqBvK,KAAKT,sBAAwB8K,GAAaC,EAE/DE,EAAgD,KAA/BxK,KAAKN,sBACtBM,KAAKN,sBAAwB,IAAMyJ,EACnCA,EACAsB,EAAuBnB,GAAetJ,KAAKyJ,oBAAoBe,IAEhExK,KAAKR,yBAA2B+K,IACjCvK,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBC,SAG3D3K,KAAKT,sBAAwB8K,GAAaC,IAC1C,IAAOrI,MAAM,GAAGxD,gCAChBuB,KAAK0K,mBAAmB,IAAI,KAC5B1K,KAAK+E,cAAc,IAAI,MACvB/E,KAAKT,sBAAuB,GAG5BS,KAAKR,yBAA2BiL,IAChCzK,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBE,QAG3D5K,KAAKP,qBAAuBgL,IAC5BzK,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,MAE/B,CASQ,uBAAAgF,CAAwBZ,EAAoBU,EAAoBP,GACpE,MAAMkB,EAAgD,KAA/BxK,KAAKN,sBACtBM,KAAKN,sBAAwB,IAAMyJ,EACnCA,EACAsB,EAAuBnB,GAAetJ,KAAKyJ,oBAAoBe,GAE/DK,EAAoC,CACtC1B,WAAYG,EAAckB,EAAiBrB,EAC3C2B,MAAOL,EACPZ,WAAYA,GAAc,EAC1BkB,cAAe/K,KAAKnB,QAAQqB,UAAY,IAM5C,GAFAF,KAAK+E,cAAc,IAAI,KAA2B8F,IAE9CJ,EAAsB,CACtB,MAAMO,EAAgBR,EAAeN,OAAOC,MAAM,OAAOzG,OACnD4G,EAAYtK,KAAKnB,QAAQ+B,2BA7sBP,EA8sBpBZ,KAAKT,sBAAwByL,EAAgBV,EAC7C,IAAOrI,MAAM,GAAGxD,2CAAoDuM,aAAyBV,kBAA0BE,OAEvH,IAAOxJ,KAAK,GAAGvC,wBAAiC+L,MAChDxK,KAAKiL,eAAeT,IAExBxK,KAAK2B,uBACT,MAAO,GAAI2H,EAAa,CACe,KAA/BtJ,KAAKN,wBACLM,KAAKN,uBAAyB,KAElCM,KAAKN,uBAAyByJ,EAE9B,MAAMc,EAAiBd,EAAWe,OAAOC,MAAM,OAAOzG,OACtD1D,KAAKL,2BAA6BkK,GAAc,GAAOI,EACvDjK,KAAKJ,sBAAwBqK,EAE7B,IAAOhI,MAAM,GAAGxD,oEAA6EuB,KAAKN,yBACtG,CACJ,CAOQ,kBAAAiI,CAAmBf,GACvB5G,KAAKH,sBAAwBiJ,KAAKC,MAElC,IAAO9G,MAAM,GAAGxD,gDAAyDmI,GAAMsE,+BAC3DlL,KAAKN,sBAAsB8J,UAAU,EAAG,uBAC1CxJ,KAAKP,qCAAqCO,KAAKR,2BAGvB,KAAtCQ,KAAKN,sBAAsBwK,OACvBlK,KAAKyJ,oBAAoBzJ,KAAKN,wBAG1BM,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAE9B,IAAOmC,MAAM,GAAGxD,uDAAgEuB,KAAKN,0BACrFM,KAAK0J,0BAA0BhL,EAA4ByM,sBAC3DnL,KAAK4B,sBACL,IAAOK,MAAM,GAAGxD,0CAKhB,IAAOwD,MAAM,GAAGxD,0EAKhBuB,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,MAE9BE,KAAK4B,sBACL,IAAOK,MAAM,GAAGxD,wDAExB,CAMQ,qBAAAuL,GAEAhK,KAAKF,qBACL4B,aAAa1B,KAAKF,oBAClBE,KAAKF,mBAAqB,OAIzBE,KAAKP,qBAAuBO,KAAKR,0BAAkE,KAAtCQ,KAAKN,sBAAsBwK,SACzFlK,KAAKF,mBAAqBwE,WAAW,KACjCtE,KAAKoL,oBACNpL,KAAKnB,QAAQiC,oBAExB,CAMQ,gBAAAsK,GACJ,IAAOrK,KAAK,GAAGtC,kDAA2DuB,KAAKnB,QAAQiC,uCACvF,IAAOmB,MAAM,GAAGxD,wCAAiDuB,KAAKN,sCACpDM,KAAKP,qCAAqCO,KAAKR,+CACvCsJ,KAAKC,MAAQ/I,KAAKH,2BAE5CG,KAAK0J,0BAA0BhL,EAA4B2M,WAC3DrL,KAAK4B,sBACL5B,KAAKF,mBAAqB,IAC9B,CAOQ,yBAAA4J,CAA0B4B,GAC9B,GAA0C,KAAtCtL,KAAKN,sBAAsBwK,OAC3B,OAGJ,GAAIoB,IAAW5M,EAA4B2M,YAAcrL,KAAKyJ,oBAAoBzJ,KAAKN,uBAEnF,YADA,IAAOuC,MAAM,GAAGxD,yDAAkE6M,iDAAsDtL,KAAKN,0BAIjJ,MAAM4K,EAAYtK,KAAKnB,QAAQ+B,2BA/zBH,EAk0BtB2K,EAAgBvL,KAAKJ,qBAAuB,EAC5CI,KAAKL,yBAA2BK,KAAKJ,qBACrC,EAGAiL,EAAoC,CACtC1B,WAAYnJ,KAAKN,sBACjBoL,OAAO,EACPjB,WAAY0B,EACZR,cAAe/K,KAAKnB,QAAQqB,UAAY,IAE5CF,KAAK+E,cAAc,IAAI,KAA2B8F,IAGlD,MAAMR,EAAYrK,KAAKN,sBAAsBwK,OAAOC,MAAM,OAAOzG,OAC7D1D,KAAKT,sBAAwB8K,EAAYC,EACzC,IAAOrI,MAAM,GAAGxD,2CAAoD4L,aAAqBC,kBAA0BtK,KAAKN,2BAExH,IAAOsB,KAAK,GAAGvC,KAAc6M,OAAYtL,KAAKN,0BAC9CM,KAAKiL,eAAejL,KAAKN,wBAI7BM,KAAK2B,uBACT,CAOQ,mBAAA8H,CAAoB+B,GACxB,MAAMC,EAAUD,EAAKtB,OACrB,OAAuB,IAAnBuB,EAAQ/H,QAIL,WAAWgI,KAAKD,EAC3B,CAKQ,qBAAA9J,GACJ3B,KAAKN,sBAAwB,GAC7BM,KAAKL,yBAA2B,EAChCK,KAAKJ,qBAAuB,CAChC,CAKQ,mBAAAgC,GACA5B,KAAKR,0BACLQ,KAAKR,yBAA0B,EAC/BQ,KAAK0K,mBAAmB,IAAI,IAAa,IAAkBE,QAG3D5K,KAAKP,sBACLO,KAAKP,qBAAsB,EAC3BO,KAAK+E,cAAc,IAAI,MAE/B,CAEQ,oBAAAwC,GAEAvH,KAAKjB,QAAUJ,EAAS8C,SACxBzB,KAAKjB,MAAQJ,EAASwD,UAE9B,CAEQ,qBAAAyF,GAMJ,GALA,IAAO5G,KAAK,GAAGvC,uBAKXuB,KAAKjB,QAAUJ,EAAS8C,OAIxB,OAHA,IAAOT,KAAK,GAAGvC,+DACfuB,KAAKlB,WAAa,UAClBkB,KAAK6E,iBAIT7E,KAAKjB,MAAQJ,EAAS0D,aACtBrC,KAAK+E,cAAc,IAAI,MAA+B,IAGlD/E,KAAKf,kBACL,IAAO+B,KAAK,GAAGvC,uDACfuB,KAAK4E,oBAEb,CAEQ,cAAAqG,CAAe9B,GACfA,GAAoC,KAAtBA,EAAWe,SAErBlK,KAAKnB,QAAQqB,WACbF,KAAKnB,QAAQ2D,eAAemJ,iBAAmB3L,KAAKnB,QAAQqB,UAGhEF,KAAK0K,mBAAmB,IAAI,IAAWvB,EAAYnJ,KAAKnB,QAAQ2D,iBAExE,CAEQ,iBAAA6F,GACJrI,KAAKT,sBAAuB,CAChC,CAUQ,kBAAAoF,CAAmBF,GACvB,MAAMmH,EAAUnH,aAAiBF,MAAQE,EAAMmH,QAAUzI,OAAOsB,GAChEzE,KAAK+E,cAAc,IAAI,KAAuC6G,GAClE,CAGQ,kBAAAlB,CAAmB3C,GACvB/H,KAAKnB,QAAQgN,YAAY9D,EAC7B,CAGQ,aAAAhD,CAAcgD,GAClB/H,KAAKnB,QAAQgJ,SAASiE,KAAK/D,EAC/B,E","sources":["webpack://Uneeq/./src/deepgram-stt.ts"],"sourcesContent":["import { type Subject } from 'rxjs'\nimport Logger from './lib/logger'\nimport {\n UserStartedSpeakingMessage,\n UserStoppedSpeakingMessage,\n SpeechTranscriptionMessage,\n EnableMicrophoneUpdatedMessage,\n SessionErrorMessage,\n SpeechRecognitionTransientErrorMessage,\n DeviceErrorMessage,\n AvatarInterruptedMessage,\n type UneeqMessage,\n UneeqMessageType,\n type PromptResultMessage,\n type AvatarAnswerMessage,\n type CustomMetadataUpdated,\n} from './types/UneeqMessages'\nimport { type SpeechTranscriptionResult } from './types/SpeechTranscriptionResult'\nimport { type DataChannelMessage } from './webrtc-data-channel/DataChannelMessage'\nimport { UserSpeaking, UserSpeakingState } from './webrtc-data-channel/messages/UserSpeaking'\nimport { ChatPrompt } from './webrtc-data-channel/messages/ChatPrompt'\nimport { StopSpeaking } from './webrtc-data-channel/messages/StopSpeaking'\nimport { type PromptMetadata } from './types/PromptMetadata'\nimport { DeepgramClient } from '@deepgram/sdk'\nimport { type SpeechRecognitionInterface } from './types/SpeechRecognitionInterface'\n\n// Local interface for the Deepgram v5 live connection — duck-typed to avoid coupling to SDK type names\ninterface DeepgramLiveConnection {\n on(event: string, handler: (...args: unknown[]) => void): void\n sendMedia(data: ArrayBuffer): void\n connect(): void\n waitForOpen(): Promise<unknown>\n close(): void\n}\n\n// v5 SDK exposes listen.v1.connect() but doesn't type it publicly — duck-typed local interface\ninterface DeepgramV1Listen {\n v1: {\n connect(options: Record<string, unknown>): Promise<DeepgramLiveConnection>\n }\n}\n\n// Constants\nconst LOG_PREFIX = '[Deepgram STT]'\nconst CONNECTION_TIMEOUT_MS = 10000\nconst AUDIO_CHUNK_MS = 250\nconst AUDIO_BITS_PER_SECOND = 48000\n\n// Reconnection constants\nconst INITIAL_RECONNECT_DELAY_MS = 1000\nconst MAX_RECONNECT_DELAY_MS = 30000\nconst RECONNECT_BACKOFF_MULTIPLIER = 2\nconst MAX_RECONNECT_ATTEMPTS = 5\n\n// Safety net timeout - triggers if no Deepgram events received while in speaking state\nconst DEFAULT_SAFETY_NET_TIMEOUT_MS = 2000\n\n// Source of accumulated transcript submission\nenum AccumulatedTranscriptSource {\n FinalTranscript = 'Final transcript (from accumulated)',\n UtteranceEndFallback = 'UtteranceEnd fallback',\n SafetyNet = 'Safety net'\n}\n\n// STT Engine States\nenum STTState {\n Idle = 'Idle',\n Connecting = 'Connecting',\n Connected = 'Connected',\n Paused = 'Paused',\n Disconnected = 'Disconnected'\n}\n\n/**\n * Partial interface for Deepgram Live Transcription Response\n * See: https://developers.deepgram.com/reference/listen-live-websocket-messages\n */\ninterface LiveTranscriptionResponse {\n channel: {\n alternatives: Array<{\n transcript: string\n confidence: number\n words?: Array<{\n word: string\n start: number\n end: number\n confidence: number\n }>\n }>\n }\n is_final: boolean\n speech_final: boolean\n metadata: {\n request_id: string\n model_info: {\n name: string\n version: string\n arch: string\n }\n model_uuid: string\n }\n}\n\n/**\n * Interface for Deepgram UtteranceEnd event\n */\ninterface UtteranceEndResponse {\n type: 'UtteranceEnd'\n last_word_end: number\n channel: number[]\n}\n\n// Interruption threshold - minimum words needed to send a chat prompt while digital human is speaking\n// When the digital human is NOT speaking, all utterances are sent regardless of word count\n// Set to 1 for easy interruption, higher values (3-5) to require more intentional speech,\n// or very high (999) to effectively disable interruption\nconst INTERRUPTION_WORD_THRESHOLD = 3\n\nexport interface DeepgramSTTOptions {\n // Backend configuration\n connectionUrl: string\n jwtToken: string\n\n // Session information\n sessionId: string\n\n // Deepgram configuration\n model?: string\n language?: string\n smartFormat?: boolean\n interimResults?: boolean\n utteranceEndMs?: number\n vadEvents?: boolean\n encoding?: string\n sampleRate?: number\n channels?: number\n fillerWords?: boolean\n endpointing?: number\n\n // Interruption configuration - minimum words to send a prompt while digital human is speaking\n // Set to 1 for easy interruption, 3-5 for intentional phrases, 999 to disable interruption\n interruptionWordThreshold?: number\n\n /**\n * Keyterms to boost in transcription results.\n * Deepgram will give higher weight to these words/phrases during recognition.\n */\n keyterms?: string[]\n\n /**\n * Remove artificial delay from transcript delivery for lower latency.\n * @default false\n */\n noDelay?: boolean\n\n /**\n * Safety net timeout in milliseconds.\n * Triggers if no Deepgram events (transcripts/VAD) are received while the user is speaking.\n * This prevents the STT engine from getting \"stuck\" when Deepgram fails to detect end-of-speech.\n * Recommended: 3000ms - 10000ms. Defaults to 5000ms.\n */\n safetyNetTimeoutMs?: number\n\n // Microphone configuration\n echoCancellation?: boolean\n noiseSuppression?: boolean\n autoGainControl?: boolean\n microphoneDeviceId?: string\n\n // Metadata and callbacks\n promptMetadata: PromptMetadata\n messages: Subject<UneeqMessage>\n sendMessage: (msg: DataChannelMessage) => void\n}\n\ninterface DeepgramTokenResponse {\n token: string\n api_url: string\n sdk_version: string\n expires_at: string\n}\n\nexport class DeepgramSTT implements SpeechRecognitionInterface {\n private connection: DeepgramLiveConnection | null = null\n private state: STTState = STTState.Idle\n private shouldReconnect: boolean = true\n private microphone: MediaRecorder | null = null\n private stream: MediaStream | null = null\n\n // Reconnection state\n private reconnectAttempts: number = 0\n private reconnectDelay: number = INITIAL_RECONNECT_DELAY_MS\n private reconnectTimeoutId: NodeJS.Timeout | null = null\n\n // Digital human speaking state\n private digitalHumanSpeaking: boolean = false\n\n // User speaking state (for data channel messages to Renny - based on transcripts)\n private isUserCurrentlySpeaking: boolean = false\n\n // UI speaking state (for UI indicator - based on transcripts)\n private isUiShowingSpeaking: boolean = false\n\n // Accumulated transcript from is_final results within current utterance\n // Deepgram sends multiple is_final chunks for long utterances, which must be\n // concatenated until speech_final arrives to get the complete transcript\n private accumulatedTranscript: string = ''\n\n // Weighted average confidence: sum(confidence * words) / sum(words)\n private accumulatedConfidenceSum: number = 0\n private accumulatedWordCount: number = 0\n\n // Safety net timeout - tracks last Deepgram event to detect stuck states\n private lastDeepgramEventTime: number = 0\n private safetyNetTimeoutId: NodeJS.Timeout | null = null\n\n constructor(private readonly options: DeepgramSTTOptions) {\n // Apply defaults\n this.options.model = this.options.model || 'nova-3'\n this.options.language = this.options.language || 'en'\n this.options.smartFormat = this.options.smartFormat ?? true\n this.options.interimResults = this.options.interimResults ?? true\n this.options.utteranceEndMs = this.options.utteranceEndMs ?? 1500\n this.options.vadEvents = this.options.vadEvents ?? true\n this.options.fillerWords = this.options.fillerWords ?? false\n this.options.endpointing = this.options.endpointing ?? 500\n\n // Note: encoding, sampleRate, and channels are NOT set here\n // Let Deepgram auto-detect from the WebM/Opus container sent by MediaRecorder\n\n this.options.echoCancellation = this.options.echoCancellation ?? true\n this.options.noiseSuppression = this.options.noiseSuppression ?? true\n this.options.autoGainControl = this.options.autoGainControl ?? true\n\n // Interruption configuration\n this.options.interruptionWordThreshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n this.options.noDelay = this.options.noDelay ?? false\n this.options.safetyNetTimeoutMs = this.options.safetyNetTimeoutMs ?? DEFAULT_SAFETY_NET_TIMEOUT_MS\n\n if (this.options.safetyNetTimeoutMs <= 500) {\n Logger.warn(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to ${DEFAULT_SAFETY_NET_TIMEOUT_MS}ms.`)\n this.options.safetyNetTimeoutMs = DEFAULT_SAFETY_NET_TIMEOUT_MS\n } else if (this.options.safetyNetTimeoutMs <= 1000) {\n Logger.warn(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is ${DEFAULT_SAFETY_NET_TIMEOUT_MS}ms.`)\n } else {\n Logger.info(`${LOG_PREFIX} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`)\n }\n\n this.handleAppMessages()\n }\n\n // Main lifecycle methods\n public async startRecognition(): Promise<void> {\n Logger.info(`${LOG_PREFIX} Starting speech recognition`)\n this.shouldReconnect = true\n this.resetReconnectionState()\n await this.connect()\n }\n\n public async stopRecognition(): Promise<void> {\n Logger.info(`${LOG_PREFIX} Stopping speech recognition`)\n this.shouldReconnect = false\n this.clearReconnectTimeout()\n await this.disconnect()\n }\n\n public async pause(): Promise<boolean> {\n Logger.info(`${LOG_PREFIX} Pausing speech recognition`)\n this.state = STTState.Paused\n\n // Clear safety net timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n // Reset accumulated transcript - any partial speech is discarded on pause\n this.resetAccumulatedState()\n\n // Reset speaking states\n this.resetSpeakingStates()\n\n // Disable audio tracks to stop sending audio, but keep microphone and connection alive\n if (this.stream) {\n this.stream.getTracks().forEach((track) => { track.enabled = false })\n Logger.debug(`${LOG_PREFIX} Audio tracks disabled`)\n }\n\n // Note: We don't send EnableMicrophoneUpdatedMessage(false) because the microphone\n // should remain visually enabled in the UI even when paused\n return true\n }\n\n public async resume(): Promise<boolean> {\n Logger.info(`${LOG_PREFIX} Resuming speech recognition`)\n\n if (this.state === STTState.Paused) {\n if (this.stream) {\n // Re-enable existing audio tracks (resume from normal pause)\n this.state = STTState.Connected\n this.stream.getTracks().forEach((track) => { track.enabled = true })\n Logger.debug(`${LOG_PREFIX} Audio tracks re-enabled`)\n return true\n }\n // Connection exists but no stream (paused during connect) — start microphone\n if (this.connection) {\n this.state = STTState.Connected\n await this.startMicrophone()\n return true\n }\n // No connection and no stream — reset state so connect() doesn't bail out\n this.state = STTState.Disconnected\n }\n\n // No connection — need full connect\n Logger.debug(`${LOG_PREFIX} Initiating connection`)\n await this.connect()\n return true\n }\n\n // Metadata management\n public setChatMetadata(chatMetadata: PromptMetadata): void {\n this.options.promptMetadata = chatMetadata\n }\n\n // Private methods\n private async connect(): Promise<void> {\n if (this.state === STTState.Connected) {\n Logger.warn(`${LOG_PREFIX} Already connected`)\n return\n }\n\n if (this.state === STTState.Connecting) {\n Logger.warn(`${LOG_PREFIX} Connection already in progress`)\n return\n }\n\n this.state = STTState.Connecting\n\n try {\n const tokenData = await this.getToken()\n Logger.info(`${LOG_PREFIX} Connecting to Deepgram`)\n\n // CRITICAL: Must use { accessToken: token } format for temporary tokens\n // Passing raw string treats it as API key with wrong authorization scheme\n const deepgram = new DeepgramClient({\n accessToken: tokenData.token,\n baseUrl: tokenData.api_url\n })\n\n // Configure the live transcription connection\n // Don't specify encoding/sample_rate/channels - let Deepgram auto-detect from WebM/Opus\n // Note: v5 SDK requires booleans to be passed as strings (\"true\"/\"false\")\n const connectionOptions = {\n model: this.options.model,\n language: this.options.language,\n smart_format: String(this.options.smartFormat),\n interim_results: String(this.options.interimResults),\n utterance_end_ms: this.options.utteranceEndMs,\n vad_events: String(this.options.vadEvents),\n filler_words: String(this.options.fillerWords),\n endpointing: this.options.endpointing,\n // Always opt out of Deepgram's Model Improvement Program to prevent\n // customer audio data from being used for model training\n mip_opt_out: 'true',\n // Optional features — only included when explicitly enabled\n // keyterm (singular) is the v5 SDK parameter name for vocabulary boosting\n ...(this.options.keyterms && this.options.keyterms.length > 0 && { keyterm: this.options.keyterms }),\n // no_delay is not a named param in the v5 SDK, pass via queryParams\n ...(this.options.noDelay && { queryParams: { no_delay: 'true' } })\n }\n\n // v5: listen.v1.connect() returns a V1Socket wrapping a ReconnectingWebSocket.\n // The v5 SDK doesn't expose .v1 in its public TypeScript types — cast to the\n // local DeepgramV1Listen interface defined above.\n const v1Listen = deepgram.listen as unknown as DeepgramV1Listen\n this.connection = await v1Listen.v1.connect(connectionOptions)\n\n // Initiate the WebSocket and wait for it to open (with timeout).\n // V1Socket.on() stores only one handler per event (last wins), so no off() is needed —\n // setupEventHandlers() below will overwrite these one-time handlers.\n this.connection.connect()\n await Promise.race([\n this.connection.waitForOpen(),\n new Promise<void>((_, reject) =>\n setTimeout(() => reject(new Error('Connection timeout')), CONNECTION_TIMEOUT_MS)\n )\n ])\n\n // Don't overwrite Paused state (user may have paused during async connection).\n // TypeScript narrows this.state to Connecting after the await, so cast to bypass.\n if ((this.state as STTState) !== STTState.Paused) {\n this.state = STTState.Connected\n }\n Logger.info(`${LOG_PREFIX} Connection opened`)\n\n // Now set up the persistent event handlers\n this.setupEventHandlers()\n\n // If user paused during async connection, stay paused — don't start microphone\n // Note: pause() can be called externally during the await, mutating this.state\n // to Paused. TypeScript narrows this.state to Connecting, so we cast to bypass.\n if ((this.state as STTState) === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Pause requested during connection — staying paused`)\n this.resetReconnectionState()\n return\n }\n\n // Start the microphone\n await this.startMicrophone()\n\n Logger.info(`${LOG_PREFIX} Connected successfully`)\n\n // Reset reconnection state on successful connection\n this.resetReconnectionState()\n } catch (error) {\n this.state = STTState.Disconnected\n Logger.error(`${LOG_PREFIX} Connection error`, Logger.serialiseError(error))\n\n // Emit a non-fatal transient error so the host can surface a \"reconnecting\"\n // indicator if it wants to.\n if (this.shouldReconnect) {\n this.emitTransientError(error)\n this.scheduleReconnect()\n }\n }\n }\n\n private async disconnect(): Promise<void> {\n if (this.state === STTState.Idle || (this.state === STTState.Disconnected && !this.connection)) {\n return\n }\n\n Logger.info(`${LOG_PREFIX} Disconnecting`)\n\n // Clear safety net timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n try {\n this.stopMicrophone()\n\n if (this.connection) {\n this.connection.close()\n this.connection = null\n }\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Disconnect error`, Logger.serialiseError(error))\n }\n\n // Reset accumulated transcript and speaking states\n this.resetAccumulatedState()\n this.resetSpeakingStates()\n\n this.state = STTState.Disconnected\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(false))\n }\n\n private scheduleReconnect(): void {\n if (this.reconnectAttempts >= MAX_RECONNECT_ATTEMPTS) {\n Logger.error(`${LOG_PREFIX} Max reconnection attempts (${MAX_RECONNECT_ATTEMPTS}) reached`)\n this.clientMsgSend(new SessionErrorMessage(\n `Unable to connect to speech recognition service after ${MAX_RECONNECT_ATTEMPTS} attempts`\n ))\n return\n }\n\n this.reconnectAttempts++\n Logger.info(\n `${LOG_PREFIX} Scheduling reconnection attempt ${this.reconnectAttempts}/${MAX_RECONNECT_ATTEMPTS} ` +\n `in ${this.reconnectDelay}ms`\n )\n\n this.reconnectTimeoutId = setTimeout(() => {\n void this.connect()\n }, this.reconnectDelay)\n\n // Exponential backoff\n this.reconnectDelay = Math.min(\n this.reconnectDelay * RECONNECT_BACKOFF_MULTIPLIER,\n MAX_RECONNECT_DELAY_MS\n )\n }\n\n private resetReconnectionState(): void {\n this.reconnectAttempts = 0\n this.reconnectDelay = INITIAL_RECONNECT_DELAY_MS\n this.clearReconnectTimeout()\n }\n\n private clearReconnectTimeout(): void {\n if (this.reconnectTimeoutId) {\n clearTimeout(this.reconnectTimeoutId)\n this.reconnectTimeoutId = null\n }\n }\n\n private async getToken(): Promise<DeepgramTokenResponse> {\n // Construct token endpoint from connectionUrl, include model for API version selection\n const model = this.options.model || 'nova-3'\n const tokenEndpoint = `${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(model)}`\n\n const response = await fetch(tokenEndpoint, {\n method: 'GET',\n headers: {\n Authorization: `Bearer ${this.options.jwtToken}`,\n 'Content-Type': 'application/json'\n }\n })\n\n if (!response.ok) {\n throw new Error(`Token fetch failed: ${response.status} ${response.statusText}`)\n }\n\n return await response.json()\n }\n\n private async startMicrophone(): Promise<void> {\n try {\n Logger.info(`${LOG_PREFIX} Starting microphone`)\n\n // Stop any existing microphone/stream first to prevent orphaned MediaRecorders\n // that would send interleaved audio data to the same Deepgram connection\n this.stopMicrophone()\n\n // Get user media - let browser use defaults for sample rate/channels with Opus\n if (!navigator.mediaDevices?.getUserMedia) {\n throw new Error('Microphone access is not available in this context')\n }\n this.stream = await navigator.mediaDevices.getUserMedia({\n audio: {\n deviceId: this.options.microphoneDeviceId ? { exact: this.options.microphoneDeviceId } : undefined,\n echoCancellation: this.options.echoCancellation,\n noiseSuppression: this.options.noiseSuppression,\n autoGainControl: this.options.autoGainControl\n }\n })\n\n // Check if user paused during the getUserMedia await\n if ((this.state as STTState) === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Paused during getUserMedia — keeping stream but disabling tracks`)\n this.stream.getTracks().forEach((track) => { track.enabled = false })\n return\n }\n\n // Create MediaRecorder to capture audio with Opus codec for Deepgram\n // Deepgram requires Opus codec in WebM container\n this.microphone = new MediaRecorder(this.stream, {\n mimeType: 'audio/webm;codecs=opus',\n audioBitsPerSecond: AUDIO_BITS_PER_SECOND\n })\n\n this.microphone.ondataavailable = (event: BlobEvent) => {\n if (event.data.size > 0 && this.connection && this.state === STTState.Connected) {\n // Convert blob to ArrayBuffer and send to Deepgram\n event.data.arrayBuffer().then((arrayBuffer) => {\n this.connection?.sendMedia(arrayBuffer)\n }).catch((error) => {\n Logger.error(`${LOG_PREFIX} Error converting audio data`, Logger.serialiseError(error))\n })\n }\n }\n\n // Start recording in chunks\n this.microphone.start(AUDIO_CHUNK_MS)\n Logger.debug(`${LOG_PREFIX} Microphone started`)\n\n // Notify that microphone is enabled (but NOT that user is speaking yet)\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(true))\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Microphone error`, Logger.serialiseError(error))\n this.clientMsgSend(new DeviceErrorMessage(new Error(JSON.stringify(error))))\n }\n }\n\n private stopMicrophone(): void {\n if (this.microphone && this.microphone.state === 'recording') {\n this.microphone.stop()\n this.microphone = null\n }\n\n if (this.stream) {\n this.stream.getTracks().forEach((track) => {\n track.stop()\n })\n this.stream = null\n }\n\n Logger.debug(`${LOG_PREFIX} Microphone stopped`)\n }\n\n private setupEventHandlers(): void {\n if (!this.connection) {\n return\n }\n\n // V1Socket.on() stores only one handler per event (last-write wins) and has no off().\n // Each call below replaces any earlier handler for that event, which is the intended behaviour.\n this.connection.on('open', () => {\n this.handleConnectionOpen()\n })\n\n // v5: all transcription messages are unified under a single 'message' event,\n // discriminated by data.type ('Results', 'UtteranceEnd', 'Metadata', etc.)\n this.connection.on('message', (data: unknown) => {\n if (data !== null && typeof data === 'object' && 'type' in data) {\n const typed = data as { type: string }\n if (typed.type === 'Results') {\n this.handleTranscript(data as unknown as LiveTranscriptionResponse)\n } else if (typed.type === 'UtteranceEnd') {\n // UtteranceEnd fires when Deepgram detects a gap in word timings.\n // Critical for noisy environments where speech_final may never fire.\n this.handleUtteranceEnd(data as unknown as UtteranceEndResponse)\n } else if (typed.type === 'Metadata') {\n Logger.debug(`${LOG_PREFIX} Metadata`, data)\n }\n }\n })\n\n this.connection.on('close', () => {\n this.handleConnectionClose()\n })\n\n this.connection.on('error', (error: unknown) => {\n Logger.error(`${LOG_PREFIX} WebSocket error event`, Logger.serialiseError(error))\n this.emitTransientError(error)\n })\n }\n\n private handleAppMessages(): void {\n this.options.messages.subscribe((msg) => {\n switch (msg.uneeqMessageType) {\n case UneeqMessageType.AvatarStartedSpeaking:\n this.digitalHumanSpeaking = true\n break\n\n case UneeqMessageType.PromptResult: {\n const promptResultMessage = msg as PromptResultMessage\n if (!promptResultMessage.promptResult.success) {\n // The prompt failed, the digital human is not speaking\n this.handleSpeakingEnd()\n }\n break\n }\n\n case UneeqMessageType.AvatarAnswer: {\n const answer = msg as AvatarAnswerMessage\n if (answer.answerSpeech.replace(/<[^>]*>/g, '') === '') {\n // The response contained nothing to speak\n this.handleSpeakingEnd()\n }\n break\n }\n\n case UneeqMessageType.AvatarStoppedSpeaking: {\n this.handleSpeakingEnd()\n break\n }\n\n case UneeqMessageType.SessionEnded: {\n this.shouldReconnect = false\n void this.stopRecognition()\n break\n }\n\n case UneeqMessageType.SessionReconnecting: {\n this.handleSpeakingEnd()\n this.shouldReconnect = false\n void this.stopRecognition()\n break\n }\n\n case UneeqMessageType.CustomMetadataUpdated: {\n this.options.promptMetadata = (msg as CustomMetadataUpdated).chatMetadata\n break\n }\n\n case UneeqMessageType.SessionBackendError: {\n this.handleSpeakingEnd()\n break\n }\n\n default:\n }\n })\n }\n\n /**\n * Primary handler for incoming Deepgram transcript events.\n * Extracts transcript data, processes chunks, and updates speaking states.\n * @param data - The live transcription response from Deepgram\n */\n private handleTranscript(data: LiveTranscriptionResponse): void {\n try {\n this.lastDeepgramEventTime = Date.now()\n\n const channel = data.channel\n if (!channel?.alternatives || channel.alternatives.length === 0) {\n return\n }\n\n const alternative = channel.alternatives[0]\n const transcript: string = String(alternative.transcript || '')\n const isFinal = data.is_final\n const speechFinal = data.speech_final\n\n // Detailed debug logging for tracking transcript flow\n Logger.debug(`${LOG_PREFIX} Transcript event: is_final=${isFinal}, speech_final=${speechFinal}, ` +\n `transcript=\"${transcript.substring(0, 50)}${transcript.length > 50 ? '...' : ''}\", ` +\n `accumulated=\"${this.accumulatedTranscript.substring(0, 30)}${this.accumulatedTranscript.length > 30 ? '...' : ''}\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`)\n\n // Logic for empty transcript with speech_final\n if (transcript === '' && speechFinal) {\n if (this.endsWithPunctuation(this.accumulatedTranscript)) {\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.FinalTranscript)\n this.resetSpeakingStates()\n } else {\n Logger.debug(`${LOG_PREFIX} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`)\n }\n return\n }\n\n if (transcript === '') {\n return\n }\n\n this.processTranscriptChunk(transcript, alternative.confidence, isFinal, speechFinal)\n this.updateSpeakingAndInterruptionState(transcript, speechFinal)\n this.emitTranscriptionResult(transcript, alternative.confidence, speechFinal)\n\n this.resetSafetyNetTimeout()\n } catch (error) {\n Logger.error(`${LOG_PREFIX} Error processing transcript`, Logger.serialiseError(error))\n }\n }\n\n /**\n * Processes a single transcript chunk, accumulating text and confidence scores.\n * @param transcript - The transcript text from the current chunk\n * @param confidence - The confidence score for the current chunk\n * @param isFinal - Whether Deepgram has marked this chunk as final\n * @param speechFinal - Whether Deepgram has detected the end of a speech segment\n */\n private processTranscriptChunk(transcript: string, confidence: number, isFinal: boolean, speechFinal: boolean): void {\n if (isFinal && !speechFinal) {\n if (this.accumulatedTranscript !== '') {\n this.accumulatedTranscript += ' '\n }\n this.accumulatedTranscript += transcript\n\n const chunkWordCount = transcript.trim().split(/\\s+/).length\n const chunkConfidence = confidence ?? 1.0\n this.accumulatedConfidenceSum += chunkConfidence * chunkWordCount\n this.accumulatedWordCount += chunkWordCount\n\n Logger.debug(`${LOG_PREFIX} Accumulated transcript: \"${this.accumulatedTranscript}\"`)\n }\n }\n\n /**\n * Evaluates whether the user's speech should trigger an interruption of the digital human\n * and updates the internal speaking states for Renny and the UI.\n * @param transcript - The current transcript text\n * @param speechFinal - Whether the current segment is marked as speech_final\n */\n private updateSpeakingAndInterruptionState(transcript: string, speechFinal: boolean): void {\n const effectiveTranscript = this.accumulatedTranscript !== '' ? this.accumulatedTranscript : transcript\n const wordCount = effectiveTranscript.trim().split(/\\s+/).length\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n\n if (!this.isUiShowingSpeaking) {\n this.isUiShowingSpeaking = true\n this.clientMsgSend(new UserStartedSpeakingMessage())\n }\n\n const willTriggerAction = !this.digitalHumanSpeaking || wordCount >= threshold\n\n const fullTranscript = this.accumulatedTranscript !== ''\n ? this.accumulatedTranscript + ' ' + transcript\n : transcript\n const speechLooksLikeFinal = speechFinal && this.endsWithPunctuation(fullTranscript)\n\n if (!this.isUserCurrentlySpeaking && willTriggerAction) {\n this.isUserCurrentlySpeaking = true\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Start))\n }\n\n if (this.digitalHumanSpeaking && wordCount >= threshold) {\n Logger.debug(`${LOG_PREFIX} Interrupting digital human`)\n this.dataChannelMsgSend(new StopSpeaking())\n this.clientMsgSend(new AvatarInterruptedMessage())\n this.digitalHumanSpeaking = false\n }\n\n if (this.isUserCurrentlySpeaking && speechLooksLikeFinal) {\n this.isUserCurrentlySpeaking = false\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Stop))\n }\n\n if (this.isUiShowingSpeaking && speechLooksLikeFinal) {\n this.isUiShowingSpeaking = false\n this.clientMsgSend(new UserStoppedSpeakingMessage())\n }\n }\n\n /**\n * Emits transcription results to the client (for closed captions) and sends\n * chat prompts to Renny if the utterance has reached a final state.\n * @param transcript - The current segment transcript\n * @param confidence - The confidence score for the current segment\n * @param speechFinal - Whether the segment is speech_final\n */\n private emitTranscriptionResult(transcript: string, confidence: number, speechFinal: boolean): void {\n const fullTranscript = this.accumulatedTranscript !== ''\n ? this.accumulatedTranscript + ' ' + transcript\n : transcript\n const speechLooksLikeFinal = speechFinal && this.endsWithPunctuation(fullTranscript)\n\n const result: SpeechTranscriptionResult = {\n transcript: speechFinal ? fullTranscript : transcript,\n final: speechLooksLikeFinal,\n confidence: confidence ?? 1.0,\n language_code: this.options.language || ''\n }\n\n // Emit the transcription message (for closed captions)\n this.clientMsgSend(new SpeechTranscriptionMessage(result))\n\n if (speechLooksLikeFinal) {\n const fullWordCount = fullTranscript.trim().split(/\\s+/).length\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n if (this.digitalHumanSpeaking && fullWordCount < threshold) {\n Logger.debug(`${LOG_PREFIX} Discarding utterance during speaking (${fullWordCount} words < ${threshold} threshold): \"${fullTranscript}\"`)\n } else {\n Logger.info(`${LOG_PREFIX} Final transcript: \"${fullTranscript}\"`)\n this.sendChatPrompt(fullTranscript)\n }\n this.resetAccumulatedState()\n } else if (speechFinal) {\n if (this.accumulatedTranscript !== '') {\n this.accumulatedTranscript += ' '\n }\n this.accumulatedTranscript += transcript\n\n const chunkWordCount = transcript.trim().split(/\\s+/).length\n this.accumulatedConfidenceSum += (confidence ?? 1.0) * chunkWordCount\n this.accumulatedWordCount += chunkWordCount\n\n Logger.debug(`${LOG_PREFIX} speech_final without punctuation, accumulated for safety net: \"${this.accumulatedTranscript}\"`)\n }\n }\n\n /**\n * Handle UtteranceEnd event from Deepgram.\n * This fires when Deepgram detects a gap in word timings, even in noisy environments\n * where speech_final may never fire due to VAD detecting background noise as \"audio activity\".\n */\n private handleUtteranceEnd(data: UtteranceEndResponse): void {\n this.lastDeepgramEventTime = Date.now()\n\n Logger.debug(`${LOG_PREFIX} UtteranceEnd event received: last_word_end=${data?.last_word_end}, ` +\n `accumulated=\"${this.accumulatedTranscript.substring(0, 50)}...\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`)\n\n // If we have accumulated transcript that wasn't sent via speech_final, check if we should send it\n if (this.accumulatedTranscript.trim() !== '') {\n if (this.endsWithPunctuation(this.accumulatedTranscript)) {\n // Punctuation detected - user appears to be done speaking\n // Clear safety net and send the transcript\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n Logger.debug(`${LOG_PREFIX} UtteranceEnd fallback triggered with transcript: \"${this.accumulatedTranscript}\"`)\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.UtteranceEndFallback)\n this.resetSpeakingStates()\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: reset speaking states`)\n } else {\n // No punctuation - user might still be mid-utterance (just pausing)\n // DON'T clear safety net - let it fire after timeout if no more speech arrives\n // DON'T reset speaking states - user is still mid-utterance\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: no punctuation, waiting for more speech or safety net`)\n }\n } else {\n // No accumulated transcript - nothing to send\n // Clear safety net and reset states\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n this.resetSpeakingStates()\n Logger.debug(`${LOG_PREFIX} UtteranceEnd: no transcript, reset speaking states`)\n }\n }\n\n /**\n * Reset the safety net timeout. Called whenever we receive a Deepgram event.\n * If we're in a speaking state and have accumulated transcript, start a new timeout.\n */\n private resetSafetyNetTimeout(): void {\n // Clear any existing timeout\n if (this.safetyNetTimeoutId) {\n clearTimeout(this.safetyNetTimeoutId)\n this.safetyNetTimeoutId = null\n }\n\n // Only set timeout if we're in a speaking state with accumulated transcript\n if ((this.isUiShowingSpeaking || this.isUserCurrentlySpeaking) && this.accumulatedTranscript.trim() !== '') {\n this.safetyNetTimeoutId = setTimeout(() => {\n this.triggerSafetyNet()\n }, this.options.safetyNetTimeoutMs)\n }\n }\n\n /**\n * Safety net trigger - called when no Deepgram events received for safetyNetTimeoutMs\n * while in a speaking state with accumulated transcript. This handles truly stuck states.\n */\n private triggerSafetyNet(): void {\n Logger.warn(`${LOG_PREFIX} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`)\n Logger.debug(`${LOG_PREFIX} Safety net triggered: accumulated=\"${this.accumulatedTranscript}\", ` +\n `uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, ` +\n `timeSinceLastEvent=${Date.now() - this.lastDeepgramEventTime}ms`)\n\n this.sendAccumulatedTranscript(AccumulatedTranscriptSource.SafetyNet)\n this.resetSpeakingStates()\n this.safetyNetTimeoutId = null\n }\n\n /**\n * Send the accumulated transcript as a final result.\n * Handles emitting SpeechTranscriptionMessage, gating by word threshold, and sending ChatPrompt.\n * @param source - Description of what triggered this (for logging)\n */\n private sendAccumulatedTranscript(source: AccumulatedTranscriptSource): void {\n if (this.accumulatedTranscript.trim() === '') {\n return\n }\n\n if (source !== AccumulatedTranscriptSource.SafetyNet && !this.endsWithPunctuation(this.accumulatedTranscript)) {\n Logger.debug(`${LOG_PREFIX} sendAccumulatedTranscript: not sending as source is ${source} and transcript doesn't end in punctuation: \"${this.accumulatedTranscript}\"`)\n return\n }\n\n const threshold = this.options.interruptionWordThreshold ?? INTERRUPTION_WORD_THRESHOLD\n\n // Calculate weighted average confidence from accumulated chunks\n const avgConfidence = this.accumulatedWordCount > 0\n ? this.accumulatedConfidenceSum / this.accumulatedWordCount\n : 1.0\n\n // Emit final transcription message for closed captions\n const result: SpeechTranscriptionResult = {\n transcript: this.accumulatedTranscript,\n final: true,\n confidence: avgConfidence,\n language_code: this.options.language || ''\n }\n this.clientMsgSend(new SpeechTranscriptionMessage(result))\n\n // Gate chat prompts during digital human speaking by word threshold\n const wordCount = this.accumulatedTranscript.trim().split(/\\s+/).length\n if (this.digitalHumanSpeaking && wordCount < threshold) {\n Logger.debug(`${LOG_PREFIX} Discarding utterance during speaking (${wordCount} words < ${threshold} threshold): \"${this.accumulatedTranscript}\"`)\n } else {\n Logger.info(`${LOG_PREFIX} ${source}: \"${this.accumulatedTranscript}\"`)\n this.sendChatPrompt(this.accumulatedTranscript)\n }\n\n // Reset accumulated state\n this.resetAccumulatedState()\n }\n\n /**\n * Check if a string ends with sentence-ending punctuation.\n * @param text - The text to check\n * @returns true if the text ends with punctuation (. ! ? etc.)\n */\n private endsWithPunctuation(text: string): boolean {\n const trimmed = text.trim()\n if (trimmed.length === 0) {\n return false\n }\n // Match common sentence-ending punctuation marks\n return /[.!?;:]$/.test(trimmed)\n }\n\n /**\n * Reset accumulated transcript and confidence state.\n */\n private resetAccumulatedState(): void {\n this.accumulatedTranscript = ''\n this.accumulatedConfidenceSum = 0\n this.accumulatedWordCount = 0\n }\n\n /**\n * Reset speaking states and send appropriate stop messages.\n */\n private resetSpeakingStates(): void {\n if (this.isUserCurrentlySpeaking) {\n this.isUserCurrentlySpeaking = false\n this.dataChannelMsgSend(new UserSpeaking(UserSpeakingState.Stop))\n }\n\n if (this.isUiShowingSpeaking) {\n this.isUiShowingSpeaking = false\n this.clientMsgSend(new UserStoppedSpeakingMessage())\n }\n }\n\n private handleConnectionOpen(): void {\n // Don't overwrite Paused state (user may have paused during async connection)\n if (this.state !== STTState.Paused) {\n this.state = STTState.Connected\n }\n }\n\n private handleConnectionClose(): void {\n Logger.info(`${LOG_PREFIX} Connection closed`)\n\n // If user has paused, don't overwrite Paused state and don't reconnect.\n // The connection closing while paused is expected (Deepgram closes idle connections).\n // resume() will reconnect when the user unmutes.\n if (this.state === STTState.Paused) {\n Logger.info(`${LOG_PREFIX} Connection closed while paused — will reconnect on resume`)\n this.connection = null\n this.stopMicrophone()\n return\n }\n\n this.state = STTState.Disconnected\n this.clientMsgSend(new EnableMicrophoneUpdatedMessage(false))\n\n // Attempt reconnection with exponential backoff\n if (this.shouldReconnect) {\n Logger.info(`${LOG_PREFIX} Unexpected disconnect, attempting reconnection...`)\n this.scheduleReconnect()\n }\n }\n\n private sendChatPrompt(transcript: string): void {\n if (transcript && transcript.trim() !== '') {\n // Set the user's spoken locale if detected\n if (this.options.language) {\n this.options.promptMetadata.userSpokenLocale = this.options.language\n }\n\n this.dataChannelMsgSend(new ChatPrompt(transcript, this.options.promptMetadata))\n }\n }\n\n private handleSpeakingEnd(): void {\n this.digitalHumanSpeaking = false\n }\n\n /**\n * Emit a non-fatal transient error to the host. Used when a single\n * connect/reconnect attempt fails or a recoverable WebSocket error fires.\n * The reconnect machinery (scheduleReconnect) will continue retrying; the\n * host receives this as an informational signal, not a fatal one.\n * Compare with the fatal `SessionErrorMessage` emitted from\n * scheduleReconnect() only when MAX_RECONNECT_ATTEMPTS is exhausted.\n */\n private emitTransientError(error: unknown): void {\n const message = error instanceof Error ? error.message : String(error)\n this.clientMsgSend(new SpeechRecognitionTransientErrorMessage(message))\n }\n\n // Send a message on the data channel to renderer\n private dataChannelMsgSend(msg: DataChannelMessage): void {\n this.options.sendMessage(msg)\n }\n\n // Send a message to the client implementation, i.e. hosted experience\n private clientMsgSend(msg: UneeqMessage): void {\n this.options.messages.next(msg)\n }\n}\n"],"names":["LOG_PREFIX","AccumulatedTranscriptSource","STTState","DeepgramSTT","options","connection","state","Idle","shouldReconnect","microphone","stream","reconnectAttempts","reconnectDelay","reconnectTimeoutId","digitalHumanSpeaking","isUserCurrentlySpeaking","isUiShowingSpeaking","accumulatedTranscript","accumulatedConfidenceSum","accumulatedWordCount","lastDeepgramEventTime","safetyNetTimeoutId","constructor","this","model","language","smartFormat","interimResults","utteranceEndMs","vadEvents","fillerWords","endpointing","echoCancellation","noiseSuppression","autoGainControl","interruptionWordThreshold","noDelay","safetyNetTimeoutMs","warn","info","handleAppMessages","startRecognition","resetReconnectionState","connect","stopRecognition","clearReconnectTimeout","disconnect","pause","Paused","clearTimeout","resetAccumulatedState","resetSpeakingStates","getTracks","forEach","track","enabled","debug","resume","Connected","startMicrophone","Disconnected","setChatMetadata","chatMetadata","promptMetadata","Connecting","tokenData","getToken","deepgram","accessToken","token","baseUrl","api_url","connectionOptions","smart_format","String","interim_results","utterance_end_ms","vad_events","filler_words","mip_opt_out","keyterms","length","keyterm","queryParams","no_delay","v1Listen","listen","v1","Promise","race","waitForOpen","_","reject","setTimeout","Error","setupEventHandlers","error","serialiseError","emitTransientError","scheduleReconnect","stopMicrophone","close","clientMsgSend","Math","min","tokenEndpoint","connectionUrl","encodeURIComponent","response","fetch","method","headers","Authorization","jwtToken","ok","status","statusText","json","navigator","mediaDevices","getUserMedia","audio","deviceId","microphoneDeviceId","exact","undefined","MediaRecorder","mimeType","audioBitsPerSecond","ondataavailable","event","data","size","arrayBuffer","then","sendMedia","catch","start","JSON","stringify","stop","on","handleConnectionOpen","typed","type","handleTranscript","handleUtteranceEnd","handleConnectionClose","messages","subscribe","msg","uneeqMessageType","AvatarStartedSpeaking","PromptResult","promptResult","success","handleSpeakingEnd","AvatarAnswer","answerSpeech","replace","AvatarStoppedSpeaking","SessionEnded","SessionReconnecting","CustomMetadataUpdated","SessionBackendError","Date","now","channel","alternatives","alternative","transcript","isFinal","is_final","speechFinal","speech_final","substring","endsWithPunctuation","sendAccumulatedTranscript","FinalTranscript","processTranscriptChunk","confidence","updateSpeakingAndInterruptionState","emitTranscriptionResult","resetSafetyNetTimeout","chunkWordCount","trim","split","chunkConfidence","wordCount","threshold","willTriggerAction","fullTranscript","speechLooksLikeFinal","dataChannelMsgSend","Start","Stop","result","final","language_code","fullWordCount","sendChatPrompt","last_word_end","UtteranceEndFallback","triggerSafetyNet","SafetyNet","source","avgConfidence","text","trimmed","test","userSpokenLocale","message","sendMessage","next"],"sourceRoot":""}
package/dist/363.index.js CHANGED
@@ -1,2 +1,2 @@
1
- "use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[363],{363(e,t,n){n.d(t,{DeepgramFluxSTT:()=>u});var i=n(514),s=n(838),o=n(33),r=n(388),a=n(58),c=n(260);const h="[Deepgram Flux STT]";var d;!function(e){e.Idle="Idle",e.Connecting="Connecting",e.Connected="Connected",e.Paused="Paused",e.Disconnected="Disconnected"}(d||(d={}));class u{options;connection=null;state=d.Idle;shouldReconnect=!0;stream=null;audioContext=null;workletNode=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;eagerPromptSentForTurn=!1;safetyNetTimeoutId=null;audioChunksSent=0;constructor(e){this.options=e,this.options.model=this.options.model||"flux-general-en",this.options.language=this.options.language||"en",this.options.eotThreshold=this.options.eotThreshold??.7,this.options.useEagerEndOfTurn=this.options.useEagerEndOfTurn??!0,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.handleAppMessages()}async startRecognition(){i.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){i.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return i.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.stream&&(this.stream.getTracks().forEach(e=>{e.enabled=!1}),i.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(i.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(e=>{e.enabled=!0}),i.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return i.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(e){this.options.promptMetadata=e}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const e=await this.getToken();i.A.info(`${h} Connecting to Deepgram Flux v2 — api_url="${e.api_url}", sdk_version="${e.sdk_version}", token_length=${e.token?.length??0}`);const t=new c.c({accessToken:e.token,baseUrl:e.api_url}),n={model:this.options.model,encoding:"linear16",sample_rate:String(16e3),mip_opt_out:"true",...void 0!==this.options.eotThreshold&&{eot_threshold:this.options.eotThreshold},...void 0!==this.options.eagerEotThreshold&&{eager_eot_threshold:this.options.eagerEotThreshold},...void 0!==this.options.eotTimeoutMs&&{eot_timeout_ms:this.options.eotTimeoutMs},...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms}};if(this.connection=await t.listen.v2.connect(n),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((e,t)=>setTimeout(()=>t(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&(this.state=d.Connected),i.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return i.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),i.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(e){this.state=d.Disconnected,i.A.error(`${h} Connection error`,i.A.serialiseError(e)),this.handleError(e),this.shouldReconnect&&this.scheduleReconnect()}}else i.A.warn(`${h} Connection already in progress`);else i.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){i.A.info(`${h} Disconnecting`);try{if(this.stopMicrophone(),this.connection){try{this.connection.sendCloseStream({type:"CloseStream"})}catch{}this.connection.close(),this.connection=null}}catch(e){i.A.error(`${h} Disconnect error`,i.A.serialiseError(e))}this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return i.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,i.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const e=this.options.model||"flux-general-en",t=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(e)}`,n=await fetch(t,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!n.ok)throw new Error(`Token fetch failed: ${n.status} ${n.statusText}`);return await n.json()}async startMicrophone(){try{if(i.A.info(`${h} Starting microphone`),this.stopMicrophone(),this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return i.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(e=>{e.enabled=!1});this.audioContext=new AudioContext({sampleRate:16e3});const e=this.audioContext.createMediaStreamSource(this.stream),t=new Blob(["\nclass PcmCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super()\n this._buffer = new Float32Array(1280)\n this._offset = 0\n }\n process(inputs, outputs, parameters) {\n const input = inputs[0]?.[0]\n if (!input) return true\n for (let i = 0; i < input.length; i++) {\n this._buffer[this._offset++] = input[i]\n if (this._offset >= this._buffer.length) {\n const int16 = new Int16Array(this._buffer.length)\n for (let j = 0; j < this._buffer.length; j++) {\n const s = Math.max(-1, Math.min(1, this._buffer[j]))\n int16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF\n }\n this.port.postMessage(int16.buffer, [int16.buffer])\n this._offset = 0\n }\n }\n return true\n }\n}\nregisterProcessor('pcm-capture-processor', PcmCaptureProcessor)\n"],{type:"application/javascript"}),n=URL.createObjectURL(t);await this.audioContext.audioWorklet.addModule(n),URL.revokeObjectURL(n),this.workletNode=new AudioWorkletNode(this.audioContext,"pcm-capture-processor"),this.audioChunksSent=0,this.workletNode.port.onmessage=e=>{this.connection&&this.state===d.Connected&&(this.connection.sendMedia(e.data),this.audioChunksSent++,this.audioChunksSent%50==1&&i.A.info(`${h} Audio chunks sent: ${this.audioChunksSent}, size: ${e.data.byteLength} bytes`))},e.connect(this.workletNode),this.workletNode.connect(this.audioContext.destination),i.A.info(`${h} Microphone started (linear16 PCM @ 16000Hz)`),this.clientMsgSend(new s.WY(!0))}catch(e){i.A.error(`${h} Microphone error`,i.A.serialiseError(e)),this.clientMsgSend(new s.co(new Error(JSON.stringify(e))))}}stopMicrophone(){this.workletNode&&(this.workletNode.port.close(),this.workletNode.disconnect(),this.workletNode=null),this.audioContext&&(this.audioContext.close().catch(()=>{}),this.audioContext=null),this.stream&&(this.stream.getTracks().forEach(e=>{e.stop()}),this.stream=null),i.A.info(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",e=>{if(null!==e&&"object"==typeof e&&"type"in e){const t=e;"TurnInfo"===t.type?this.handleTurnInfo(e):"Connected"===t.type?i.A.info(`${h} v2 connection confirmed`):"Error"===t.type?this.handleFatalError(e):i.A.debug(`${h} Unhandled v2 message type: ${t.type}`)}}),this.connection.on("close",e=>{this.handleConnectionClose(e)}),this.connection.on("error",e=>{this.handleError(e)}))}handleTurnInfo(e){try{switch(i.A.debug(`${h} TurnInfo event: ${e.event}, transcript="${(e.transcript||"").substring(0,50)}${(e.transcript||"").length>50?"...":""}", turn_index=${e.turn_index}, eot_confidence=${e.end_of_turn_confidence??"n/a"}`),e.event){case"StartOfTurn":this.handleStartOfTurn(e);break;case"Update":this.handleUpdate(e);break;case"EagerEndOfTurn":this.handleEagerEndOfTurn(e);break;case"TurnResumed":this.handleTurnResumed(e);break;case"EndOfTurn":this.handleEndOfTurn(e);break;default:i.A.debug(`${h} Unknown TurnInfo event: ${e.event}`)}}catch(e){i.A.error(`${h} Error processing TurnInfo`,i.A.serialiseError(e))}finally{this.resetSafetyNet()}}handleStartOfTurn(e){i.A.debug(`${h} StartOfTurn: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn=!1}handleUpdate(e){const t=e.transcript||"";if(""===t)return;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4)),this.isUserCurrentlySpeaking||(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new o.A(o.f.Start))),this.digitalHumanSpeaking&&(i.A.info(`${h} User speech detected during avatar speaking — interrupting`),this.dataChannelMsgSend(new a.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1);const n={transcript:t,final:!1,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new s.Ux(n))}handleEagerEndOfTurn(e){const t=e.transcript||"";i.A.debug(`${h} EagerEndOfTurn: confidence=${e.end_of_turn_confidence}, transcript="${t}"`),this.options.useEagerEndOfTurn?""!==t.trim()&&(this.digitalHumanSpeaking?i.A.debug(`${h} EagerEndOfTurn ignored — avatar is speaking`):(i.A.info(`${h} EagerEndOfTurn: sending prompt early: "${t}"`),this.eagerPromptSentForTurn=!0,this.sendChatPrompt(t))):i.A.debug(`${h} EagerEndOfTurn ignored (useEagerEndOfTurn is disabled)`)}handleTurnResumed(e){i.A.debug(`${h} TurnResumed: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn&&(i.A.warn(`${h} TurnResumed after eager prompt was already sent — cancelling via StopSpeaking`),this.dataChannelMsgSend(new a.f)),this.eagerPromptSentForTurn=!1}handleEndOfTurn(e){const t=e.transcript||"";if(i.A.info(`${h} EndOfTurn: "${t}", confidence=${e.end_of_turn_confidence}`),""!==t.trim()){const n={transcript:t,final:!0,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new s.Ux(n)),this.digitalHumanSpeaking?i.A.warn(`${h} EndOfTurn: discarding prompt — avatar is still speaking (no Update interrupted)`):(this.eagerPromptSentForTurn&&i.A.debug(`${h} EndOfTurn: cancelling in-flight eager prompt`),this.dataChannelMsgSend(new a.f),this.sendChatPrompt(t))}this.eagerPromptSentForTurn=!1,this.resetSpeakingStates()}handleFatalError(e){i.A.error(`${h} Fatal error from Deepgram: ${e.code} — ${e.description}`),this.clientMsgSend(new s.Cj(`Deepgram error: ${e.code} — ${e.description}`))}calculateWordConfidence(e){return e&&0!==e.length?e.reduce((e,t)=>e+t.confidence,0)/e.length:1}handleAppMessages(){this.options.messages.subscribe(e=>{switch(e.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:e.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===e.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=e.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(e){const t=e?.code??"unknown",n=e?.reason??"";if(i.A.info(`${h} Connection closed — code=${t}, reason="${n}"`),this.state===d.Paused)return i.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,this.stopMicrophone(),this.clearSafetyNet(),void this.resetSpeakingStates();this.state=d.Disconnected,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(i.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}handleError(e){const t={};e instanceof Event&&(t.type=e.type,t.target=e.target?.url??e.target?.readyState??"unknown"),i.A.error(`${h} Error occurred`,e,t);const n=e instanceof Error?e.message:String(e);n.includes("microphone")||n.includes("permission")||n.includes("getUserMedia")?this.clientMsgSend(new s.co(new Error(n))):this.clientMsgSend(new s.Cj(n))}sendChatPrompt(e){e&&""!==e.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new r.D(e,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}resetSafetyNet(){this.clearSafetyNet(),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&(this.safetyNetTimeoutId=setTimeout(()=>{i.A.warn(`${h} Safety net: no TurnInfo events for ${this.options.safetyNetTimeoutMs}ms while speaking — resetting`),this.resetSpeakingStates()},this.options.safetyNetTimeoutMs))}clearSafetyNet(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null)}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new o.A(o.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}dataChannelMsgSend(e){this.options.sendMessage(e)}clientMsgSend(e){this.options.messages.next(e)}}}}]);
1
+ "use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[363],{363(e,t,n){n.d(t,{DeepgramFluxSTT:()=>u});var i=n(514),s=n(838),o=n(33),r=n(388),a=n(58),c=n(260);const h="[Deepgram Flux STT]";var d;!function(e){e.Idle="Idle",e.Connecting="Connecting",e.Connected="Connected",e.Paused="Paused",e.Disconnected="Disconnected"}(d||(d={}));class u{options;connection=null;state=d.Idle;shouldReconnect=!0;stream=null;audioContext=null;workletNode=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;eagerPromptSentForTurn=!1;safetyNetTimeoutId=null;audioChunksSent=0;constructor(e){this.options=e,this.options.model=this.options.model||"flux-general-en",this.options.language=this.options.language||"en",this.options.eotThreshold=this.options.eotThreshold??.7,this.options.useEagerEndOfTurn=this.options.useEagerEndOfTurn??!0,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.handleAppMessages()}async startRecognition(){i.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){i.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return i.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.stream&&(this.stream.getTracks().forEach(e=>{e.enabled=!1}),i.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(i.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(e=>{e.enabled=!0}),i.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return i.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(e){this.options.promptMetadata=e}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const e=await this.getToken();i.A.info(`${h} Connecting to Deepgram Flux v2 — api_url="${e.api_url}", sdk_version="${e.sdk_version}", token_length=${e.token?.length??0}`);const t=new c.c({accessToken:e.token,baseUrl:e.api_url}),n={model:this.options.model,encoding:"linear16",sample_rate:String(16e3),mip_opt_out:"true",...void 0!==this.options.eotThreshold&&{eot_threshold:this.options.eotThreshold},...void 0!==this.options.eagerEotThreshold&&{eager_eot_threshold:this.options.eagerEotThreshold},...void 0!==this.options.eotTimeoutMs&&{eot_timeout_ms:this.options.eotTimeoutMs},...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms}};if(this.connection=await t.listen.v2.connect(n),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((e,t)=>setTimeout(()=>t(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&(this.state=d.Connected),i.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return i.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),i.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(e){this.state=d.Disconnected,i.A.error(`${h} Connection error`,i.A.serialiseError(e)),this.shouldReconnect&&(this.emitTransientError(e),this.scheduleReconnect())}}else i.A.warn(`${h} Connection already in progress`);else i.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){i.A.info(`${h} Disconnecting`);try{if(this.stopMicrophone(),this.connection){try{this.connection.sendCloseStream({type:"CloseStream"})}catch{}this.connection.close(),this.connection=null}}catch(e){i.A.error(`${h} Disconnect error`,i.A.serialiseError(e))}this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return i.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,i.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const e=this.options.model||"flux-general-en",t=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(e)}`,n=await fetch(t,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!n.ok)throw new Error(`Token fetch failed: ${n.status} ${n.statusText}`);return await n.json()}async startMicrophone(){try{if(i.A.info(`${h} Starting microphone`),this.stopMicrophone(),this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return i.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(e=>{e.enabled=!1});this.audioContext=new AudioContext({sampleRate:16e3});const e=this.audioContext.createMediaStreamSource(this.stream),t=new Blob(["\nclass PcmCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super()\n this._buffer = new Float32Array(1280)\n this._offset = 0\n }\n process(inputs, outputs, parameters) {\n const input = inputs[0]?.[0]\n if (!input) return true\n for (let i = 0; i < input.length; i++) {\n this._buffer[this._offset++] = input[i]\n if (this._offset >= this._buffer.length) {\n const int16 = new Int16Array(this._buffer.length)\n for (let j = 0; j < this._buffer.length; j++) {\n const s = Math.max(-1, Math.min(1, this._buffer[j]))\n int16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF\n }\n this.port.postMessage(int16.buffer, [int16.buffer])\n this._offset = 0\n }\n }\n return true\n }\n}\nregisterProcessor('pcm-capture-processor', PcmCaptureProcessor)\n"],{type:"application/javascript"}),n=URL.createObjectURL(t);await this.audioContext.audioWorklet.addModule(n),URL.revokeObjectURL(n),this.workletNode=new AudioWorkletNode(this.audioContext,"pcm-capture-processor"),this.audioChunksSent=0,this.workletNode.port.onmessage=e=>{this.connection&&this.state===d.Connected&&(this.connection.sendMedia(e.data),this.audioChunksSent++,this.audioChunksSent%50==1&&i.A.info(`${h} Audio chunks sent: ${this.audioChunksSent}, size: ${e.data.byteLength} bytes`))},e.connect(this.workletNode),this.workletNode.connect(this.audioContext.destination),i.A.info(`${h} Microphone started (linear16 PCM @ 16000Hz)`),this.clientMsgSend(new s.WY(!0))}catch(e){i.A.error(`${h} Microphone error`,i.A.serialiseError(e)),this.clientMsgSend(new s.co(new Error(JSON.stringify(e))))}}stopMicrophone(){this.workletNode&&(this.workletNode.port.close(),this.workletNode.disconnect(),this.workletNode=null),this.audioContext&&(this.audioContext.close().catch(()=>{}),this.audioContext=null),this.stream&&(this.stream.getTracks().forEach(e=>{e.stop()}),this.stream=null),i.A.info(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",e=>{if(null!==e&&"object"==typeof e&&"type"in e){const t=e;"TurnInfo"===t.type?this.handleTurnInfo(e):"Connected"===t.type?i.A.info(`${h} v2 connection confirmed`):"Error"===t.type?this.handleFatalError(e):i.A.debug(`${h} Unhandled v2 message type: ${t.type}`)}}),this.connection.on("close",e=>{this.handleConnectionClose(e)}),this.connection.on("error",e=>{const t={};e instanceof Event&&(t.type=e.type,t.target=e.target?.url??e.target?.readyState??"unknown"),i.A.error(`${h} WebSocket error event`,e,t),this.emitTransientError(e)}))}handleTurnInfo(e){try{switch(i.A.debug(`${h} TurnInfo event: ${e.event}, transcript="${(e.transcript||"").substring(0,50)}${(e.transcript||"").length>50?"...":""}", turn_index=${e.turn_index}, eot_confidence=${e.end_of_turn_confidence??"n/a"}`),e.event){case"StartOfTurn":this.handleStartOfTurn(e);break;case"Update":this.handleUpdate(e);break;case"EagerEndOfTurn":this.handleEagerEndOfTurn(e);break;case"TurnResumed":this.handleTurnResumed(e);break;case"EndOfTurn":this.handleEndOfTurn(e);break;default:i.A.debug(`${h} Unknown TurnInfo event: ${e.event}`)}}catch(e){i.A.error(`${h} Error processing TurnInfo`,i.A.serialiseError(e))}finally{this.resetSafetyNet()}}handleStartOfTurn(e){i.A.debug(`${h} StartOfTurn: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn=!1}handleUpdate(e){const t=e.transcript||"";if(""===t)return;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4)),this.isUserCurrentlySpeaking||(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new o.A(o.f.Start))),this.digitalHumanSpeaking&&(i.A.info(`${h} User speech detected during avatar speaking — interrupting`),this.dataChannelMsgSend(new a.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1);const n={transcript:t,final:!1,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new s.Ux(n))}handleEagerEndOfTurn(e){const t=e.transcript||"";i.A.debug(`${h} EagerEndOfTurn: confidence=${e.end_of_turn_confidence}, transcript="${t}"`),this.options.useEagerEndOfTurn?""!==t.trim()&&(this.digitalHumanSpeaking?i.A.debug(`${h} EagerEndOfTurn ignored — avatar is speaking`):(i.A.info(`${h} EagerEndOfTurn: sending prompt early: "${t}"`),this.eagerPromptSentForTurn=!0,this.sendChatPrompt(t))):i.A.debug(`${h} EagerEndOfTurn ignored (useEagerEndOfTurn is disabled)`)}handleTurnResumed(e){i.A.debug(`${h} TurnResumed: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn&&(i.A.warn(`${h} TurnResumed after eager prompt was already sent — cancelling via StopSpeaking`),this.dataChannelMsgSend(new a.f)),this.eagerPromptSentForTurn=!1}handleEndOfTurn(e){const t=e.transcript||"";if(i.A.info(`${h} EndOfTurn: "${t}", confidence=${e.end_of_turn_confidence}`),""!==t.trim()){const n={transcript:t,final:!0,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new s.Ux(n)),this.digitalHumanSpeaking?i.A.warn(`${h} EndOfTurn: discarding prompt — avatar is still speaking (no Update interrupted)`):(this.eagerPromptSentForTurn&&i.A.debug(`${h} EndOfTurn: cancelling in-flight eager prompt`),this.dataChannelMsgSend(new a.f),this.sendChatPrompt(t))}this.eagerPromptSentForTurn=!1,this.resetSpeakingStates()}handleFatalError(e){i.A.error(`${h} Fatal error from Deepgram: ${e.code} — ${e.description}`),this.clientMsgSend(new s.Cj(`Deepgram error: ${e.code} — ${e.description}`))}calculateWordConfidence(e){return e&&0!==e.length?e.reduce((e,t)=>e+t.confidence,0)/e.length:1}handleAppMessages(){this.options.messages.subscribe(e=>{switch(e.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:e.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===e.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=e.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(e){const t=e?.code??"unknown",n=e?.reason??"";if(i.A.info(`${h} Connection closed — code=${t}, reason="${n}"`),this.state===d.Paused)return i.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,this.stopMicrophone(),this.clearSafetyNet(),void this.resetSpeakingStates();this.state=d.Disconnected,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(i.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}emitTransientError(e){const t=e instanceof Error?e.message:String(e);this.clientMsgSend(new s.fP(t))}sendChatPrompt(e){e&&""!==e.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new r.D(e,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}resetSafetyNet(){this.clearSafetyNet(),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&(this.safetyNetTimeoutId=setTimeout(()=>{i.A.warn(`${h} Safety net: no TurnInfo events for ${this.options.safetyNetTimeoutMs}ms while speaking — resetting`),this.resetSpeakingStates()},this.options.safetyNetTimeoutMs))}clearSafetyNet(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null)}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new o.A(o.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}dataChannelMsgSend(e){this.options.sendMessage(e)}clientMsgSend(e){this.options.messages.next(e)}}}}]);
2
2
  //# sourceMappingURL=363.index.js.map