uneeq-js 3.16.0 → 3.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +0,0 @@
1
- import{a as b}from"./chunk-J5C63PLV.js";import{D as p,U as h,V as C,f as t,h as S,j as T,l as g,n as v,o as f,r as k,v as y,w as M}from"./chunk-OXR5BQBV.js";var n="[Deepgram STT]",E=1e4,U=250,R=48e3,$=1e3,N=3e4,D=2,d=5,l=2e3;var m=3,w=class{constructor(e){this.options=e;this.options.model=this.options.model||"nova-3",this.options.language=this.options.language||"en",this.options.smartFormat=this.options.smartFormat??!0,this.options.interimResults=this.options.interimResults??!0,this.options.utteranceEndMs=this.options.utteranceEndMs??1500,this.options.vadEvents=this.options.vadEvents??!0,this.options.fillerWords=this.options.fillerWords??!1,this.options.endpointing=this.options.endpointing??500,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.options.interruptionWordThreshold=this.options.interruptionWordThreshold??m,this.options.noDelay=this.options.noDelay??!1,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??l,this.options.safetyNetTimeoutMs<=500?(t.warn(`${n} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to ${l}ms.`),this.options.safetyNetTimeoutMs=l):this.options.safetyNetTimeoutMs<=1e3?t.warn(`${n} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is ${l}ms.`):t.info(`${n} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`),this.handleAppMessages()}options;connection=null;state="Idle";shouldReconnect=!0;microphone=null;stream=null;reconnectAttempts=0;reconnectDelay=$;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;accumulatedTranscript="";accumulatedConfidenceSum=0;accumulatedWordCount=0;lastDeepgramEventTime=0;safetyNetTimeoutId=null;async startRecognition(){t.info(`${n} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){t.info(`${n} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return t.info(`${n} Pausing speech recognition`),this.state="Paused",this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetAccumulatedState(),this.resetSpeakingStates(),this.stream&&(this.stream.getTracks().forEach(e=>{e.enabled=!1}),t.debug(`${n} Audio tracks disabled`)),!0}async resume(){if(t.info(`${n} Resuming speech recognition`),this.state==="Paused"){if(this.stream)return this.state="Connected",this.stream.getTracks().forEach(e=>{e.enabled=!0}),t.debug(`${n} Audio tracks re-enabled`),!0;if(this.connection)return this.state="Connected",await this.startMicrophone(),!0;this.state="Disconnected"}return t.debug(`${n} Initiating connection`),await this.connect(),!0}setChatMetadata(e){this.options.promptMetadata=e}async connect(){if(this.state==="Connected"){t.warn(`${n} Already connected`);return}if(this.state==="Connecting"){t.warn(`${n} Connection already in progress`);return}this.state="Connecting";try{let e=await this.getToken();t.info(`${n} Connecting to Deepgram`);let i=new b({accessToken:e.token,baseUrl:e.api_url}),a={model:this.options.model,language:this.options.language,smart_format:String(this.options.smartFormat),interim_results:String(this.options.interimResults),utterance_end_ms:this.options.utteranceEndMs,vad_events:String(this.options.vadEvents),filler_words:String(this.options.fillerWords),endpointing:this.options.endpointing,mip_opt_out:"true",...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms},...this.options.noDelay&&{queryParams:{no_delay:"true"}}},s=i.listen;if(this.connection=await s.v1.connect(a),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((o,r)=>setTimeout(()=>r(new Error("Connection timeout")),E))]),this.state!=="Paused"&&(this.state="Connected"),t.info(`${n} Connection opened`),this.setupEventHandlers(),this.state==="Paused"){t.info(`${n} Pause requested during connection \u2014 staying paused`),this.resetReconnectionState();return}await this.startMicrophone(),t.info(`${n} Connected successfully`),this.resetReconnectionState()}catch(e){this.state="Disconnected",t.error(`${n} Connection error`,t.serialiseError(e)),this.shouldReconnect&&(this.emitTransientError(e),this.scheduleReconnect())}}async disconnect(){if(!(this.state==="Idle"||this.state==="Disconnected"&&!this.connection)){t.info(`${n} Disconnecting`),this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null);try{this.stopMicrophone(),this.connection&&(this.connection.close(),this.connection=null)}catch(e){t.error(`${n} Disconnect error`,t.serialiseError(e))}this.resetAccumulatedState(),this.resetSpeakingStates(),this.state="Disconnected",this.clientMsgSend(new p(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=d){t.error(`${n} Max reconnection attempts (${d}) reached`),this.clientMsgSend(new y(`Unable to connect to speech recognition service after ${d} attempts`));return}this.reconnectAttempts++,t.info(`${n} Scheduling reconnection attempt ${this.reconnectAttempts}/${d} in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(this.reconnectDelay*D,N)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=$,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){let e=this.options.model||"nova-3",i=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(e)}`,a=await fetch(i,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!a.ok)throw new Error(`Token fetch failed: ${a.status} ${a.statusText}`);return await a.json()}async startMicrophone(){try{if(t.info(`${n} Starting microphone`),this.stopMicrophone(),!navigator.mediaDevices?.getUserMedia)throw new Error("Microphone access is not available in this context");if(this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state==="Paused"){t.info(`${n} Paused during getUserMedia \u2014 keeping stream but disabling tracks`),this.stream.getTracks().forEach(e=>{e.enabled=!1});return}this.microphone=new MediaRecorder(this.stream,{mimeType:"audio/webm;codecs=opus",audioBitsPerSecond:R}),this.microphone.ondataavailable=e=>{e.data.size>0&&this.connection&&this.state==="Connected"&&e.data.arrayBuffer().then(i=>{this.connection?.sendMedia(i)}).catch(i=>{t.error(`${n} Error converting audio data`,t.serialiseError(i))})},this.microphone.start(U),t.debug(`${n} Microphone started`),this.clientMsgSend(new p(!0))}catch(e){t.error(`${n} Microphone error`,t.serialiseError(e)),this.clientMsgSend(new T(new Error(JSON.stringify(e))))}}stopMicrophone(){this.microphone&&this.microphone.state==="recording"&&(this.microphone.stop(),this.microphone=null),this.stream&&(this.stream.getTracks().forEach(e=>{e.stop()}),this.stream=null),t.debug(`${n} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",e=>{if(e!==null&&typeof e=="object"&&"type"in e){let i=e;i.type==="Results"?this.handleTranscript(e):i.type==="UtteranceEnd"?this.handleUtteranceEnd(e):i.type==="Metadata"&&t.debug(`${n} Metadata`,e)}}),this.connection.on("close",()=>{this.handleConnectionClose()}),this.connection.on("error",e=>{t.error(`${n} WebSocket error event`,t.serialiseError(e)),this.emitTransientError(e)}))}handleAppMessages(){this.options.messages.subscribe(e=>{switch(e.uneeqMessageType){case"AvatarStartedSpeaking":this.digitalHumanSpeaking=!0;break;case"PromptResult":{e.promptResult.success||this.handleSpeakingEnd();break}case"AvatarAnswer":{e.answerSpeech.replace(/<[^>]*>/g,"")===""&&this.handleSpeakingEnd();break}case"AvatarStoppedSpeaking":{this.handleSpeakingEnd();break}case"SessionEnded":{this.shouldReconnect=!1,this.stopRecognition();break}case"SessionReconnecting":{this.handleSpeakingEnd(),this.pause();break}case"CustomMetadataUpdated":{this.options.promptMetadata=e.chatMetadata;break}case"SessionBackendError":{this.handleSpeakingEnd();break}default:}})}handleTranscript(e){try{this.lastDeepgramEventTime=Date.now();let i=e.channel;if(!i?.alternatives||i.alternatives.length===0)return;let a=i.alternatives[0],s=String(a.transcript||""),o=e.is_final,r=e.speech_final;if(t.debug(`${n} Transcript event: is_final=${o}, speech_final=${r}, transcript="${s.substring(0,50)}${s.length>50?"...":""}", accumulated="${this.accumulatedTranscript.substring(0,30)}${this.accumulatedTranscript.length>30?"...":""}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),s===""&&r){this.endsWithPunctuation(this.accumulatedTranscript)?(this.sendAccumulatedTranscript("Final transcript (from accumulated)"),this.resetSpeakingStates()):t.debug(`${n} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`);return}if(s==="")return;this.processTranscriptChunk(s,a.confidence,o,r),this.updateSpeakingAndInterruptionState(s,r),this.emitTranscriptionResult(s,a.confidence,r),this.resetSafetyNetTimeout()}catch(i){t.error(`${n} Error processing transcript`,t.serialiseError(i))}}processTranscriptChunk(e,i,a,s){if(a&&!s){this.accumulatedTranscript!==""&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=e;let o=e.trim().split(/\s+/).length,r=i??1;this.accumulatedConfidenceSum+=r*o,this.accumulatedWordCount+=o,t.debug(`${n} Accumulated transcript: "${this.accumulatedTranscript}"`)}}updateSpeakingAndInterruptionState(e,i){let s=(this.accumulatedTranscript!==""?this.accumulatedTranscript:e).trim().split(/\s+/).length,o=this.options.interruptionWordThreshold??m;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new v));let r=!this.digitalHumanSpeaking||s>=o,c=this.accumulatedTranscript!==""?this.accumulatedTranscript+" "+e:e,u=i&&this.endsWithPunctuation(c);!this.isUserCurrentlySpeaking&&r&&(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new h("start"))),this.digitalHumanSpeaking&&s>=o&&(t.debug(`${n} Interrupting digital human`),this.dataChannelMsgSend(new C),this.clientMsgSend(new k),this.digitalHumanSpeaking=!1),this.isUserCurrentlySpeaking&&u&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new h("stop"))),this.isUiShowingSpeaking&&u&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new f))}emitTranscriptionResult(e,i,a){let s=this.accumulatedTranscript!==""?this.accumulatedTranscript+" "+e:e,o=a&&this.endsWithPunctuation(s),r={transcript:a?s:e,final:o,confidence:i??1,language_code:this.options.language||""};if(this.clientMsgSend(new g(r)),o){let c=s.trim().split(/\s+/).length,u=this.options.interruptionWordThreshold??m;this.digitalHumanSpeaking&&c<u?t.debug(`${n} Discarding utterance during speaking (${c} words < ${u} threshold): "${s}"`):(t.info(`${n} Final transcript: "${s}"`),this.sendChatPrompt(s)),this.resetAccumulatedState()}else if(a){this.accumulatedTranscript!==""&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=e;let c=e.trim().split(/\s+/).length;this.accumulatedConfidenceSum+=(i??1)*c,this.accumulatedWordCount+=c,t.debug(`${n} speech_final without punctuation, accumulated for safety net: "${this.accumulatedTranscript}"`)}}handleUtteranceEnd(e){this.lastDeepgramEventTime=Date.now(),t.debug(`${n} UtteranceEnd event received: last_word_end=${e?.last_word_end}, accumulated="${this.accumulatedTranscript.substring(0,50)}...", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),this.accumulatedTranscript.trim()!==""?this.endsWithPunctuation(this.accumulatedTranscript)?(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),t.debug(`${n} UtteranceEnd fallback triggered with transcript: "${this.accumulatedTranscript}"`),this.sendAccumulatedTranscript("UtteranceEnd fallback"),this.resetSpeakingStates(),t.debug(`${n} UtteranceEnd: reset speaking states`)):t.debug(`${n} UtteranceEnd: no punctuation, waiting for more speech or safety net`):(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetSpeakingStates(),t.debug(`${n} UtteranceEnd: no transcript, reset speaking states`))}resetSafetyNetTimeout(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&this.accumulatedTranscript.trim()!==""&&(this.safetyNetTimeoutId=setTimeout(()=>{this.triggerSafetyNet()},this.options.safetyNetTimeoutMs))}triggerSafetyNet(){t.warn(`${n} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`),t.debug(`${n} Safety net triggered: accumulated="${this.accumulatedTranscript}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, timeSinceLastEvent=${Date.now()-this.lastDeepgramEventTime}ms`),this.sendAccumulatedTranscript("Safety net"),this.resetSpeakingStates(),this.safetyNetTimeoutId=null}sendAccumulatedTranscript(e){if(this.accumulatedTranscript.trim()==="")return;if(e!=="Safety net"&&!this.endsWithPunctuation(this.accumulatedTranscript)){t.debug(`${n} sendAccumulatedTranscript: not sending as source is ${e} and transcript doesn't end in punctuation: "${this.accumulatedTranscript}"`);return}let i=this.options.interruptionWordThreshold??m,a=this.accumulatedWordCount>0?this.accumulatedConfidenceSum/this.accumulatedWordCount:1,s={transcript:this.accumulatedTranscript,final:!0,confidence:a,language_code:this.options.language||""};this.clientMsgSend(new g(s));let o=this.accumulatedTranscript.trim().split(/\s+/).length;this.digitalHumanSpeaking&&o<i?t.debug(`${n} Discarding utterance during speaking (${o} words < ${i} threshold): "${this.accumulatedTranscript}"`):(t.info(`${n} ${e}: "${this.accumulatedTranscript}"`),this.sendChatPrompt(this.accumulatedTranscript)),this.resetAccumulatedState()}endsWithPunctuation(e){let i=e.trim();return i.length===0?!1:/[.!?;:]$/.test(i)}resetAccumulatedState(){this.accumulatedTranscript="",this.accumulatedConfidenceSum=0,this.accumulatedWordCount=0}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new h("stop"))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new f))}handleConnectionOpen(){this.state!=="Paused"&&(this.state="Connected")}handleConnectionClose(){if(t.info(`${n} Connection closed`),this.state==="Paused"){t.info(`${n} Connection closed while paused \u2014 will reconnect on resume`),this.connection=null,this.stopMicrophone();return}this.state="Disconnected",this.clientMsgSend(new p(!1)),this.shouldReconnect&&(t.info(`${n} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}sendChatPrompt(e){e&&e.trim()!==""&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new S(e,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}emitTransientError(e){let i=e instanceof Error?e.message:String(e);this.clientMsgSend(new M(i))}dataChannelMsgSend(e){this.options.sendMessage(e)}clientMsgSend(e){this.options.messages.next(e)}};export{w as DeepgramSTT};