uneeq-js 3.17.1 → 3.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -59
- package/dist/3.index.js +1 -1
- package/dist/363.index.js +1 -1
- package/dist/622.index.js +2 -0
- package/dist/622.index.js.map +1 -0
- package/dist/esm/chunks/chunk-5UQJPIVZ.js +2 -0
- package/dist/esm/chunks/{deepgram-flux-stt-5T2LMSNA.js → deepgram-flux-stt-Q6AWAYTS.js} +1 -1
- package/dist/esm/chunks/{deepgram-stt-32KGKTHM.js → deepgram-stt-YCQ6YLEN.js} +1 -1
- package/dist/esm/index.js +13 -13
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +13 -3
- package/dist/260.index.js +0 -2
- package/dist/260.index.js.map +0 -1
- package/dist/esm/chunks/chunk-WXUMSHF3.js +0 -2
package/README.md
CHANGED
|
@@ -1,97 +1,136 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://res.cloudinary.com/ddownn0ib/image/upload/v1781838531/Uneeq/uneeq_logo_yyc49n.png" alt="UneeQ" width="220">
|
|
3
|
+
</p>
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
#### Install dependencies:
|
|
6
|
-
`npm i`
|
|
5
|
+
# uneeq-js — UneeQ Digital Humans frontend SDK
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
`npm start`
|
|
7
|
+
Frontend SDK for integrating with the **UneeQ Digital Human Platform**. It manages the full lifecycle of a digital human session in the browser — WebRTC video streaming, microphone capture, speech recognition, and the data channel to the renderer.
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
<p align="center">
|
|
10
|
+
<img src="https://res.cloudinary.com/ddownn0ib/image/upload/v1781838526/Uneeq/uneeq_dh_laptop_promo_zncbsc.avif" alt="UneeQ digital human on the immersive platform" width="640">
|
|
11
|
+
</p>
|
|
13
12
|
|
|
14
|
-
|
|
13
|
+
- **Platform documentation:** https://docs.uneeq.io/#/
|
|
14
|
+
- **About UneeQ Digital Humans:** https://www.digitalhumans.com
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
## Install
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
```bash
|
|
19
|
+
npm install uneeq-js
|
|
20
|
+
```
|
|
19
21
|
|
|
20
|
-
|
|
22
|
+
## Quick start
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
# From uneeq-js directory:
|
|
24
|
-
npm link
|
|
25
|
-
npm start # Runs in watch mode, auto-compiles changes
|
|
24
|
+
A UneeQ session is created server-side first. Your backend calls the UneeQ session API to obtain a `sessionId` and `sessionToken`, then passes them to the SDK in the browser:
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
npm start
|
|
30
|
-
```
|
|
26
|
+
```typescript
|
|
27
|
+
import { Uneeq, CameraAnchorDistance, CameraAnchorHorizontal, type UneeqConfig } from 'uneeq-js'
|
|
31
28
|
|
|
32
|
-
|
|
29
|
+
const config: UneeqConfig = {
|
|
30
|
+
// Required — obtained from the UneeQ session API
|
|
31
|
+
sessionId: 'session-id-from-server',
|
|
32
|
+
sessionToken: 'jwt-from-server',
|
|
33
|
+
connectionUrl: 'wss://your-uneeq-host',
|
|
34
|
+
personaId: 'persona-uuid',
|
|
33
35
|
|
|
34
|
-
|
|
36
|
+
// Required — initial camera framing
|
|
37
|
+
cameraAnchorDistance: CameraAnchorDistance.MediumShot,
|
|
38
|
+
cameraAnchorHorizontal: CameraAnchorHorizontal.Center,
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
// Where the digital human video renders
|
|
41
|
+
videoContainerElement: document.getElementById('video-container') as HTMLDivElement,
|
|
42
|
+
|
|
43
|
+
// Optional
|
|
44
|
+
enableMicrophone: true,
|
|
45
|
+
logLevel: 'info',
|
|
46
|
+
messageHandler: (msg) => console.log(msg.uneeqMessageType),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const uneeq = new Uneeq(config)
|
|
50
|
+
uneeq.init()
|
|
40
51
|
```
|
|
41
52
|
|
|
42
|
-
|
|
53
|
+
## Core API
|
|
54
|
+
|
|
55
|
+
```typescript
|
|
56
|
+
// Session
|
|
57
|
+
uneeq.init() // Start the session
|
|
58
|
+
uneeq.endSession() // End it gracefully
|
|
59
|
+
|
|
60
|
+
// Conversation
|
|
61
|
+
uneeq.chatPrompt('Hello!') // Send text to the NLP backend
|
|
62
|
+
uneeq.speak('Say this exactly') // Make the digital human speak text directly
|
|
63
|
+
uneeq.stopSpeaking() // Interrupt current speech
|
|
43
64
|
|
|
44
|
-
|
|
65
|
+
// Microphone & speech recognition
|
|
66
|
+
uneeq.enableMicrophone(true)
|
|
67
|
+
uneeq.pauseSpeechRecognition()
|
|
68
|
+
uneeq.resumeSpeechRecognition()
|
|
45
69
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
70
|
+
// Digital human audio & camera
|
|
71
|
+
uneeq.muteDigitalHuman()
|
|
72
|
+
uneeq.unmuteDigitalHuman()
|
|
73
|
+
uneeq.cameraAnchorDistance(position) // CameraAnchorDistance: CloseUp, MediumShot, FullShot, …
|
|
74
|
+
uneeq.cameraAnchorHorizontal(position) // CameraAnchorHorizontal: Left, Center, Right
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Events
|
|
78
|
+
|
|
79
|
+
All session events are delivered through the `messageHandler` callback:
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
import { UneeqMessageType } from 'uneeq-js'
|
|
83
|
+
|
|
84
|
+
messageHandler: (msg) => {
|
|
85
|
+
switch (msg.uneeqMessageType) {
|
|
86
|
+
case UneeqMessageType.SessionLive: break // Ready to interact
|
|
87
|
+
case UneeqMessageType.AvatarStartedSpeaking: break
|
|
88
|
+
case UneeqMessageType.AvatarStoppedSpeaking: break
|
|
89
|
+
case UneeqMessageType.UserStartedSpeaking: break
|
|
90
|
+
case UneeqMessageType.SpeechTranscription: break
|
|
91
|
+
case UneeqMessageType.SessionEnded: break
|
|
92
|
+
case UneeqMessageType.SessionError: break
|
|
93
|
+
// ...see UneeqMessageType for the full set
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
```
|
|
49
97
|
|
|
50
|
-
|
|
51
|
-
A custom logger has been implemented using `pino` library. You can use it to log strings with values.
|
|
52
|
-
`Logger.info('Logging just a string')`
|
|
53
|
-
`Logger.info('Logging with value', { some: 'thing' })`
|
|
54
|
-
`Logger.info('Logging with multiple values', { some: 'thing' }, { another: 'thing' })`
|
|
98
|
+
Wait for `SessionLive` before calling conversation APIs.
|
|
55
99
|
|
|
56
|
-
|
|
57
|
-
`Logger.debug('a debug log')`
|
|
58
|
-
`Logger.info('an info log')`
|
|
59
|
-
`Logger.warn('a warning log')`
|
|
60
|
-
`Logger.error('an error log')`
|
|
100
|
+
## Configuration
|
|
61
101
|
|
|
62
|
-
|
|
102
|
+
`UneeqConfig` covers session credentials, microphone behaviour, speech-recognition provider (`google` or `deepgram`), VAD/interruption, and diagnostics. The full set of options is documented in the bundled TypeScript types (`UneeqConfig`) — your editor's autocomplete is the source of truth.
|
|
63
103
|
|
|
64
|
-
|
|
104
|
+
### Logging
|
|
65
105
|
|
|
106
|
+
Logs are prefixed with `UneeQ: ` so they're easy to filter. Set verbosity with `logLevel` (`'debug' | 'info' | 'warn' | 'error'`, default `'info'`).
|
|
66
107
|
|
|
67
108
|
## Beta features
|
|
68
109
|
|
|
69
|
-
### `audioUpstreamMode`
|
|
110
|
+
### `audioUpstreamMode`
|
|
70
111
|
|
|
71
|
-
`audioUpstreamMode` on `UneeqConfig` controls where user microphone audio is delivered. **Omit the field or leave it `undefined` for the existing GA behaviour — no existing caller needs to change anything.**
|
|
112
|
+
`audioUpstreamMode` on `UneeqConfig` controls where the user's microphone audio is delivered. **Omit the field (or leave it `undefined`) for the existing GA behaviour — no existing caller needs to change anything.**
|
|
72
113
|
|
|
73
114
|
| Value | Status | What it does |
|
|
74
115
|
| --- | --- | --- |
|
|
75
|
-
| `'speech-recognition-service'` | **GA / default** | Mic is captured with browser AEC/AGC/NS
|
|
76
|
-
| `'pixel-streaming'` | **Beta** | Mic is attached to the Pixel Streaming WebRTC peer connection as an upstream audio track
|
|
77
|
-
| `'both'` | **Beta** | Mic is delivered to BOTH the
|
|
116
|
+
| `'speech-recognition-service'` | **GA / default** | Mic is captured with browser AEC/AGC/NS and sent to UneeQ's STT WebSocket (Google or Deepgram). Transcripts flow to the renderer over the data channel. This is the behaviour when the field is omitted. |
|
|
117
|
+
| `'pixel-streaming'` | **Beta** | Mic is attached to the Pixel Streaming WebRTC peer connection as an upstream audio track. No STT client is instantiated; `speechRecognitionProvider` is ignored. Use when the renderer drives conversation in-renderer. |
|
|
118
|
+
| `'both'` | **Beta** | Mic is delivered to BOTH the peer connection AND the STT WebSocket. Two independent `getUserMedia` captures run in parallel. Use when the renderer needs raw audio AND you still want server-side transcripts (captions, compliance, transcript-driven LLM). |
|
|
78
119
|
|
|
79
|
-
**Headphone caveat:** the
|
|
120
|
+
**Headphone caveat:** the Pixel Streaming library captures the mic track without AEC/AGC/NS, so expect echo/feedback unless the user is wearing headphones. Not a concern in the default `'speech-recognition-service'` mode.
|
|
80
121
|
|
|
81
|
-
**Shape and defaults may change while the two non-default modes are beta.** The
|
|
122
|
+
**Shape and defaults may change while the two non-default modes are beta.** The SDK emits an `info`-level log line at session init whenever a beta mode is active, so the current upstream routing is visible in support captures:
|
|
82
123
|
|
|
83
124
|
```
|
|
84
125
|
audioUpstreamMode='pixel-streaming' is beta; behaviour may change.
|
|
85
126
|
```
|
|
86
127
|
|
|
87
|
-
|
|
128
|
+
Some other features, review the docs for more in-depth guidance:
|
|
88
129
|
|
|
89
|
-
- `muteUpstreamMic()` / `unmuteUpstreamMic()` — mute/unmute the
|
|
90
|
-
- `pauseSpeechRecognition()` / `resumeSpeechRecognition()` — unchanged
|
|
91
|
-
- `enableMicrophone(enabled)` —
|
|
130
|
+
- `muteUpstreamMic()` / `unmuteUpstreamMic()` — mute/unmute the Pixel Streaming upstream mic track only. No-op with a warning in `'speech-recognition-service'` mode (there is no PS track to control). In `'pixel-streaming'` / `'both'` mode, prefer these over `enableMicrophone()` when you only want to gate the PS leg.
|
|
131
|
+
- `pauseSpeechRecognition()` / `resumeSpeechRecognition()` — unchanged, but now warn and return `false` in `'pixel-streaming'` mode (there is no STT leg to pause).
|
|
132
|
+
- `enableMicrophone(enabled)` — dispatches to whichever leg(s) are active for the current mode: STT in `'speech-recognition-service'`, PS in `'pixel-streaming'`, both in `'both'`.
|
|
92
133
|
|
|
93
|
-
|
|
134
|
+
## License
|
|
94
135
|
|
|
95
|
-
|
|
96
|
-
#### onnxruntime-web
|
|
97
|
-
onnxruntime-web only works on version 1.15.1 at this time. Upgrading this package will likely break VAD funtionality.
|
|
136
|
+
ISC © UneeQ
|
package/dist/3.index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[3],{3(t,e,i){i.d(e,{DeepgramSTT:()=>p});var n=i(514),s=i(838),a=i(33),o=i(388),r=i(58),c=i(260);const h="[Deepgram STT]";var u,d;!function(t){t.FinalTranscript="Final transcript (from accumulated)",t.UtteranceEndFallback="UtteranceEnd fallback",t.SafetyNet="Safety net"}(u||(u={})),function(t){t.Idle="Idle",t.Connecting="Connecting",t.Connected="Connected",t.Paused="Paused",t.Disconnected="Disconnected"}(d||(d={}));class p{options;connection=null;state=d.Idle;shouldReconnect=!0;microphone=null;stream=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;accumulatedTranscript="";accumulatedConfidenceSum=0;accumulatedWordCount=0;lastDeepgramEventTime=0;safetyNetTimeoutId=null;constructor(t){this.options=t,this.options.model=this.options.model||"nova-3",this.options.language=this.options.language||"en",this.options.smartFormat=this.options.smartFormat??!0,this.options.interimResults=this.options.interimResults??!0,this.options.utteranceEndMs=this.options.utteranceEndMs??1500,this.options.vadEvents=this.options.vadEvents??!0,this.options.fillerWords=this.options.fillerWords??!1,this.options.endpointing=this.options.endpointing??500,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.options.interruptionWordThreshold=this.options.interruptionWordThreshold??3,this.options.noDelay=this.options.noDelay??!1,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.safetyNetTimeoutMs<=500?(n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to 2000ms.`),this.options.safetyNetTimeoutMs=2e3):this.options.safetyNetTimeoutMs<=1e3?n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is 2000ms.`):n.A.info(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`),this.handleAppMessages()}async startRecognition(){n.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){n.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return n.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetAccumulatedState(),this.resetSpeakingStates(),this.stream&&(this.stream.getTracks().forEach(t=>{t.enabled=!1}),n.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(n.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(t=>{t.enabled=!0}),n.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return n.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(t){this.options.promptMetadata=t}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const t=await this.getToken();n.A.info(`${h} Connecting to Deepgram`);const e=new c.c({accessToken:t.token,baseUrl:t.api_url}),i={model:this.options.model,language:this.options.language,smart_format:String(this.options.smartFormat),interim_results:String(this.options.interimResults),utterance_end_ms:this.options.utteranceEndMs,vad_events:String(this.options.vadEvents),filler_words:String(this.options.fillerWords),endpointing:this.options.endpointing,mip_opt_out:"true",...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms},...this.options.noDelay&&{queryParams:{no_delay:"true"}}},s=e.listen;if(this.connection=await s.v1.connect(i),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((t,e)=>setTimeout(()=>e(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&this.state!==d.Disconnected&&(this.state=d.Connected),n.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return n.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();if(this.state===d.Disconnected)return n.A.info(`${h} Recognition stopped during connection — aborting`),void(this.connection&&(this.connection.close(),this.connection=null));await this.startMicrophone(),n.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(t){this.state=d.Disconnected,n.A.error(`${h} Connection error`,n.A.serialiseError(t)),this.shouldReconnect&&(this.emitTransientError(t),this.scheduleReconnect())}}else n.A.warn(`${h} Connection already in progress`);else n.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){n.A.info(`${h} Disconnecting`),this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null);try{this.stopMicrophone(),this.connection&&(this.connection.close(),this.connection=null)}catch(t){n.A.error(`${h} Disconnect error`,n.A.serialiseError(t))}this.resetAccumulatedState(),this.resetSpeakingStates(),this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return n.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,n.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const t=this.options.model||"nova-3",e=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(t)}`,i=await fetch(e,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!i.ok)throw new Error(`Token fetch failed: ${i.status} ${i.statusText}`);return await i.json()}async startMicrophone(){try{if(n.A.info(`${h} Starting microphone`),this.stopMicrophone(),!navigator.mediaDevices?.getUserMedia)throw new Error("Microphone access is not available in this context");if(this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return n.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(t=>{t.enabled=!1});if(this.state===d.Disconnected)return n.A.info(`${h} Recognition stopped during getUserMedia — releasing stream`),this.stream.getTracks().forEach(t=>{t.stop()}),void(this.stream=null);this.microphone=new MediaRecorder(this.stream,{mimeType:"audio/webm;codecs=opus",audioBitsPerSecond:48e3}),this.microphone.ondataavailable=t=>{t.data.size>0&&this.connection&&this.state===d.Connected&&t.data.arrayBuffer().then(t=>{this.connection?.sendMedia(t)}).catch(t=>{n.A.error(`${h} Error converting audio data`,n.A.serialiseError(t))})},this.microphone.start(250),n.A.debug(`${h} Microphone started`),this.clientMsgSend(new s.WY(!0))}catch(t){n.A.error(`${h} Microphone error`,n.A.serialiseError(t)),this.clientMsgSend(new s.co(new Error(JSON.stringify(t))))}}stopMicrophone(){this.microphone&&"recording"===this.microphone.state&&(this.microphone.stop(),this.microphone=null),this.stream&&(this.stream.getTracks().forEach(t=>{t.stop()}),this.stream=null),n.A.debug(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",t=>{if(null!==t&&"object"==typeof t&&"type"in t){const e=t;"Results"===e.type?this.handleTranscript(t):"UtteranceEnd"===e.type?this.handleUtteranceEnd(t):"Metadata"===e.type&&n.A.debug(`${h} Metadata`,t)}}),this.connection.on("close",()=>{this.handleConnectionClose()}),this.connection.on("error",t=>{n.A.error(`${h} WebSocket error event`,n.A.serialiseError(t)),this.emitTransientError(t)}))}handleAppMessages(){this.options.messages.subscribe(t=>{switch(t.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:t.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===t.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.pause();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=t.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleTranscript(t){try{this.lastDeepgramEventTime=Date.now();const e=t.channel;if(!e?.alternatives||0===e.alternatives.length)return;const i=e.alternatives[0],s=String(i.transcript||""),a=t.is_final,o=t.speech_final;if(n.A.debug(`${h} Transcript event: is_final=${a}, speech_final=${o}, transcript="${s.substring(0,50)}${s.length>50?"...":""}", accumulated="${this.accumulatedTranscript.substring(0,30)}${this.accumulatedTranscript.length>30?"...":""}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""===s&&o)return void(this.endsWithPunctuation(this.accumulatedTranscript)?(this.sendAccumulatedTranscript(u.FinalTranscript),this.resetSpeakingStates()):n.A.debug(`${h} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`));if(""===s)return;this.processTranscriptChunk(s,i.confidence,a,o),this.updateSpeakingAndInterruptionState(s,o),this.emitTranscriptionResult(s,i.confidence,o),this.resetSafetyNetTimeout()}catch(t){n.A.error(`${h} Error processing transcript`,n.A.serialiseError(t))}}processTranscriptChunk(t,e,i,s){if(i&&!s){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length,s=e??1;this.accumulatedConfidenceSum+=s*i,this.accumulatedWordCount+=i,n.A.debug(`${h} Accumulated transcript: "${this.accumulatedTranscript}"`)}}updateSpeakingAndInterruptionState(t,e){const i=(""!==this.accumulatedTranscript?this.accumulatedTranscript:t).trim().split(/\s+/).length,o=this.options.interruptionWordThreshold??3;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4));const c=!this.digitalHumanSpeaking||i>=o,u=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,d=e&&this.endsWithPunctuation(u);!this.isUserCurrentlySpeaking&&c&&(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new a.A(a.f.Start))),this.digitalHumanSpeaking&&i>=o&&(n.A.debug(`${h} Interrupting digital human`),this.dataChannelMsgSend(new r.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1),this.isUserCurrentlySpeaking&&d&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&d&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}emitTranscriptionResult(t,e,i){const a=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,o=i&&this.endsWithPunctuation(a),r={transcript:i?a:t,final:o,confidence:e??1,language_code:this.options.language||""};if(this.clientMsgSend(new s.Ux(r)),o){const t=a.trim().split(/\s+/).length,e=this.options.interruptionWordThreshold??3;this.digitalHumanSpeaking&&t<e?n.A.debug(`${h} Discarding utterance during speaking (${t} words < ${e} threshold): "${a}"`):(n.A.info(`${h} Final transcript: "${a}"`),this.sendChatPrompt(a)),this.resetAccumulatedState()}else if(i){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length;this.accumulatedConfidenceSum+=(e??1)*i,this.accumulatedWordCount+=i,n.A.debug(`${h} speech_final without punctuation, accumulated for safety net: "${this.accumulatedTranscript}"`)}}handleUtteranceEnd(t){this.lastDeepgramEventTime=Date.now(),n.A.debug(`${h} UtteranceEnd event received: last_word_end=${t?.last_word_end}, accumulated="${this.accumulatedTranscript.substring(0,50)}...", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""!==this.accumulatedTranscript.trim()?this.endsWithPunctuation(this.accumulatedTranscript)?(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),n.A.debug(`${h} UtteranceEnd fallback triggered with transcript: "${this.accumulatedTranscript}"`),this.sendAccumulatedTranscript(u.UtteranceEndFallback),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: reset speaking states`)):n.A.debug(`${h} UtteranceEnd: no punctuation, waiting for more speech or safety net`):(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: no transcript, reset speaking states`))}resetSafetyNetTimeout(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&""!==this.accumulatedTranscript.trim()&&(this.safetyNetTimeoutId=setTimeout(()=>{this.triggerSafetyNet()},this.options.safetyNetTimeoutMs))}triggerSafetyNet(){n.A.warn(`${h} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`),n.A.debug(`${h} Safety net triggered: accumulated="${this.accumulatedTranscript}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, timeSinceLastEvent=${Date.now()-this.lastDeepgramEventTime}ms`),this.sendAccumulatedTranscript(u.SafetyNet),this.resetSpeakingStates(),this.safetyNetTimeoutId=null}sendAccumulatedTranscript(t){if(""===this.accumulatedTranscript.trim())return;if(t!==u.SafetyNet&&!this.endsWithPunctuation(this.accumulatedTranscript))return void n.A.debug(`${h} sendAccumulatedTranscript: not sending as source is ${t} and transcript doesn't end in punctuation: "${this.accumulatedTranscript}"`);const e=this.options.interruptionWordThreshold??3,i=this.accumulatedWordCount>0?this.accumulatedConfidenceSum/this.accumulatedWordCount:1,a={transcript:this.accumulatedTranscript,final:!0,confidence:i,language_code:this.options.language||""};this.clientMsgSend(new s.Ux(a));const o=this.accumulatedTranscript.trim().split(/\s+/).length;this.digitalHumanSpeaking&&o<e?n.A.debug(`${h} Discarding utterance during speaking (${o} words < ${e} threshold): "${this.accumulatedTranscript}"`):(n.A.info(`${h} ${t}: "${this.accumulatedTranscript}"`),this.sendChatPrompt(this.accumulatedTranscript)),this.resetAccumulatedState()}endsWithPunctuation(t){const e=t.trim();return 0!==e.length&&/[.!?;:]$/.test(e)}resetAccumulatedState(){this.accumulatedTranscript="",this.accumulatedConfidenceSum=0,this.accumulatedWordCount=0}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(){if(n.A.info(`${h} Connection closed`),this.state===d.Paused)return n.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,void this.stopMicrophone();this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(n.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}sendChatPrompt(t){t&&""!==t.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new o.D(t,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}emitTransientError(t){const e=t instanceof Error?t.message:String(t);this.clientMsgSend(new s.fP(e))}dataChannelMsgSend(t){this.options.sendMessage(t)}clientMsgSend(t){this.options.messages.next(t)}}}}]);
|
|
1
|
+
"use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[3],{3(t,e,i){i.d(e,{DeepgramSTT:()=>p});var n=i(514),s=i(838),a=i(33),o=i(388),r=i(58),c=i(622);const h="[Deepgram STT]";var u,d;!function(t){t.FinalTranscript="Final transcript (from accumulated)",t.UtteranceEndFallback="UtteranceEnd fallback",t.SafetyNet="Safety net"}(u||(u={})),function(t){t.Idle="Idle",t.Connecting="Connecting",t.Connected="Connected",t.Paused="Paused",t.Disconnected="Disconnected"}(d||(d={}));class p{options;connection=null;state=d.Idle;shouldReconnect=!0;microphone=null;stream=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;accumulatedTranscript="";accumulatedConfidenceSum=0;accumulatedWordCount=0;lastDeepgramEventTime=0;safetyNetTimeoutId=null;constructor(t){this.options=t,this.options.model=this.options.model||"nova-3",this.options.language=this.options.language||"en",this.options.smartFormat=this.options.smartFormat??!0,this.options.interimResults=this.options.interimResults??!0,this.options.utteranceEndMs=this.options.utteranceEndMs??1500,this.options.vadEvents=this.options.vadEvents??!0,this.options.fillerWords=this.options.fillerWords??!1,this.options.endpointing=this.options.endpointing??500,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0,this.options.interruptionWordThreshold=this.options.interruptionWordThreshold??3,this.options.noDelay=this.options.noDelay??!1,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.safetyNetTimeoutMs<=500?(n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Ignoring and default the value to 2000ms.`),this.options.safetyNetTimeoutMs=2e3):this.options.safetyNetTimeoutMs<=1e3?n.A.warn(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms. This is very short and may cause premature transcript finalization. Recommended value is 2000ms.`):n.A.info(`${h} safetyNetTimeoutMs is set to ${this.options.safetyNetTimeoutMs}ms.`),this.handleAppMessages()}async startRecognition(){n.A.info(`${h} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){n.A.info(`${h} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return n.A.info(`${h} Pausing speech recognition`),this.state=d.Paused,this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetAccumulatedState(),this.resetSpeakingStates(),this.stream&&(this.stream.getTracks().forEach(t=>{t.enabled=!1}),n.A.debug(`${h} Audio tracks disabled`)),!0}async resume(){if(n.A.info(`${h} Resuming speech recognition`),this.state===d.Paused){if(this.stream)return this.state=d.Connected,this.stream.getTracks().forEach(t=>{t.enabled=!0}),n.A.debug(`${h} Audio tracks re-enabled`),!0;if(this.connection)return this.state=d.Connected,await this.startMicrophone(),!0;this.state=d.Disconnected}return n.A.debug(`${h} Initiating connection`),await this.connect(),!0}setChatMetadata(t){this.options.promptMetadata=t}async connect(){if(this.state!==d.Connected)if(this.state!==d.Connecting){this.state=d.Connecting;try{const t=await this.getToken();n.A.info(`${h} Connecting to Deepgram`);const e=new c.c({accessToken:t.token,baseUrl:t.api_url}),i={model:this.options.model,language:this.options.language,smart_format:String(this.options.smartFormat),interim_results:String(this.options.interimResults),utterance_end_ms:this.options.utteranceEndMs,vad_events:String(this.options.vadEvents),filler_words:String(this.options.fillerWords),endpointing:this.options.endpointing,mip_opt_out:"true",...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms},...this.options.noDelay&&{queryParams:{no_delay:"true"}}},s=e.listen;if(this.connection=await s.v1.connect(i),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((t,e)=>setTimeout(()=>e(new Error("Connection timeout")),1e4))]),this.state!==d.Paused&&this.state!==d.Disconnected&&(this.state=d.Connected),n.A.info(`${h} Connection opened`),this.setupEventHandlers(),this.state===d.Paused)return n.A.info(`${h} Pause requested during connection — staying paused`),void this.resetReconnectionState();if(this.state===d.Disconnected)return n.A.info(`${h} Recognition stopped during connection — aborting`),void(this.connection&&(this.connection.close(),this.connection=null));await this.startMicrophone(),n.A.info(`${h} Connected successfully`),this.resetReconnectionState()}catch(t){this.state=d.Disconnected,n.A.error(`${h} Connection error`,n.A.serialiseError(t)),this.shouldReconnect&&(this.emitTransientError(t),this.scheduleReconnect())}}else n.A.warn(`${h} Connection already in progress`);else n.A.warn(`${h} Already connected`)}async disconnect(){if(this.state!==d.Idle&&(this.state!==d.Disconnected||this.connection)){n.A.info(`${h} Disconnecting`),this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null);try{this.stopMicrophone(),this.connection&&(this.connection.close(),this.connection=null)}catch(t){n.A.error(`${h} Disconnect error`,n.A.serialiseError(t))}this.resetAccumulatedState(),this.resetSpeakingStates(),this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return n.A.error(`${h} Max reconnection attempts (5) reached`),void this.clientMsgSend(new s.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,n.A.info(`${h} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const t=this.options.model||"nova-3",e=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(t)}`,i=await fetch(e,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!i.ok)throw new Error(`Token fetch failed: ${i.status} ${i.statusText}`);return await i.json()}async startMicrophone(){try{if(n.A.info(`${h} Starting microphone`),this.stopMicrophone(),!navigator.mediaDevices?.getUserMedia)throw new Error("Microphone access is not available in this context");if(this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===d.Paused)return n.A.info(`${h} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(t=>{t.enabled=!1});if(this.state===d.Disconnected)return n.A.info(`${h} Recognition stopped during getUserMedia — releasing stream`),this.stream.getTracks().forEach(t=>{t.stop()}),void(this.stream=null);this.microphone=new MediaRecorder(this.stream,{mimeType:"audio/webm;codecs=opus",audioBitsPerSecond:48e3}),this.microphone.ondataavailable=t=>{t.data.size>0&&this.connection&&this.state===d.Connected&&t.data.arrayBuffer().then(t=>{this.connection?.sendMedia(t)}).catch(t=>{n.A.error(`${h} Error converting audio data`,n.A.serialiseError(t))})},this.microphone.start(250),n.A.debug(`${h} Microphone started`),this.clientMsgSend(new s.WY(!0))}catch(t){n.A.error(`${h} Microphone error`,n.A.serialiseError(t)),this.clientMsgSend(new s.co(new Error(JSON.stringify(t))))}}stopMicrophone(){this.microphone&&"recording"===this.microphone.state&&(this.microphone.stop(),this.microphone=null),this.stream&&(this.stream.getTracks().forEach(t=>{t.stop()}),this.stream=null),n.A.debug(`${h} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",t=>{if(null!==t&&"object"==typeof t&&"type"in t){const e=t;"Results"===e.type?this.handleTranscript(t):"UtteranceEnd"===e.type?this.handleUtteranceEnd(t):"Metadata"===e.type&&n.A.debug(`${h} Metadata`,t)}}),this.connection.on("close",()=>{this.handleConnectionClose()}),this.connection.on("error",t=>{n.A.error(`${h} WebSocket error event`,n.A.serialiseError(t)),this.emitTransientError(t)}))}handleAppMessages(){this.options.messages.subscribe(t=>{switch(t.uneeqMessageType){case s.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case s.Yg.PromptResult:t.promptResult.success||this.handleSpeakingEnd();break;case s.Yg.AvatarAnswer:""===t.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case s.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case s.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case s.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.pause();break;case s.Yg.CustomMetadataUpdated:this.options.promptMetadata=t.chatMetadata;break;case s.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleTranscript(t){try{this.lastDeepgramEventTime=Date.now();const e=t.channel;if(!e?.alternatives||0===e.alternatives.length)return;const i=e.alternatives[0],s=String(i.transcript||""),a=t.is_final,o=t.speech_final;if(n.A.debug(`${h} Transcript event: is_final=${a}, speech_final=${o}, transcript="${s.substring(0,50)}${s.length>50?"...":""}", accumulated="${this.accumulatedTranscript.substring(0,30)}${this.accumulatedTranscript.length>30?"...":""}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""===s&&o)return void(this.endsWithPunctuation(this.accumulatedTranscript)?(this.sendAccumulatedTranscript(u.FinalTranscript),this.resetSpeakingStates()):n.A.debug(`${h} speech_final with empty transcript but no punctuation, waiting for more speech or safety net`));if(""===s)return;this.processTranscriptChunk(s,i.confidence,a,o),this.updateSpeakingAndInterruptionState(s,o),this.emitTranscriptionResult(s,i.confidence,o),this.resetSafetyNetTimeout()}catch(t){n.A.error(`${h} Error processing transcript`,n.A.serialiseError(t))}}processTranscriptChunk(t,e,i,s){if(i&&!s){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length,s=e??1;this.accumulatedConfidenceSum+=s*i,this.accumulatedWordCount+=i,n.A.debug(`${h} Accumulated transcript: "${this.accumulatedTranscript}"`)}}updateSpeakingAndInterruptionState(t,e){const i=(""!==this.accumulatedTranscript?this.accumulatedTranscript:t).trim().split(/\s+/).length,o=this.options.interruptionWordThreshold??3;this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.clientMsgSend(new s._4));const c=!this.digitalHumanSpeaking||i>=o,u=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,d=e&&this.endsWithPunctuation(u);!this.isUserCurrentlySpeaking&&c&&(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new a.A(a.f.Start))),this.digitalHumanSpeaking&&i>=o&&(n.A.debug(`${h} Interrupting digital human`),this.dataChannelMsgSend(new r.f),this.clientMsgSend(new s.tc),this.digitalHumanSpeaking=!1),this.isUserCurrentlySpeaking&&d&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&d&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}emitTranscriptionResult(t,e,i){const a=""!==this.accumulatedTranscript?this.accumulatedTranscript+" "+t:t,o=i&&this.endsWithPunctuation(a),r={transcript:i?a:t,final:o,confidence:e??1,language_code:this.options.language||""};if(this.clientMsgSend(new s.Ux(r)),o){const t=a.trim().split(/\s+/).length,e=this.options.interruptionWordThreshold??3;this.digitalHumanSpeaking&&t<e?n.A.debug(`${h} Discarding utterance during speaking (${t} words < ${e} threshold): "${a}"`):(n.A.info(`${h} Final transcript: "${a}"`),this.sendChatPrompt(a)),this.resetAccumulatedState()}else if(i){""!==this.accumulatedTranscript&&(this.accumulatedTranscript+=" "),this.accumulatedTranscript+=t;const i=t.trim().split(/\s+/).length;this.accumulatedConfidenceSum+=(e??1)*i,this.accumulatedWordCount+=i,n.A.debug(`${h} speech_final without punctuation, accumulated for safety net: "${this.accumulatedTranscript}"`)}}handleUtteranceEnd(t){this.lastDeepgramEventTime=Date.now(),n.A.debug(`${h} UtteranceEnd event received: last_word_end=${t?.last_word_end}, accumulated="${this.accumulatedTranscript.substring(0,50)}...", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}`),""!==this.accumulatedTranscript.trim()?this.endsWithPunctuation(this.accumulatedTranscript)?(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),n.A.debug(`${h} UtteranceEnd fallback triggered with transcript: "${this.accumulatedTranscript}"`),this.sendAccumulatedTranscript(u.UtteranceEndFallback),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: reset speaking states`)):n.A.debug(`${h} UtteranceEnd: no punctuation, waiting for more speech or safety net`):(this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),this.resetSpeakingStates(),n.A.debug(`${h} UtteranceEnd: no transcript, reset speaking states`))}resetSafetyNetTimeout(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&""!==this.accumulatedTranscript.trim()&&(this.safetyNetTimeoutId=setTimeout(()=>{this.triggerSafetyNet()},this.options.safetyNetTimeoutMs))}triggerSafetyNet(){n.A.warn(`${h} Safety net triggered: no Deepgram events for ${this.options.safetyNetTimeoutMs}ms while speaking`),n.A.debug(`${h} Safety net triggered: accumulated="${this.accumulatedTranscript}", uiSpeaking=${this.isUiShowingSpeaking}, userSpeaking=${this.isUserCurrentlySpeaking}, timeSinceLastEvent=${Date.now()-this.lastDeepgramEventTime}ms`),this.sendAccumulatedTranscript(u.SafetyNet),this.resetSpeakingStates(),this.safetyNetTimeoutId=null}sendAccumulatedTranscript(t){if(""===this.accumulatedTranscript.trim())return;if(t!==u.SafetyNet&&!this.endsWithPunctuation(this.accumulatedTranscript))return void n.A.debug(`${h} sendAccumulatedTranscript: not sending as source is ${t} and transcript doesn't end in punctuation: "${this.accumulatedTranscript}"`);const e=this.options.interruptionWordThreshold??3,i=this.accumulatedWordCount>0?this.accumulatedConfidenceSum/this.accumulatedWordCount:1,a={transcript:this.accumulatedTranscript,final:!0,confidence:i,language_code:this.options.language||""};this.clientMsgSend(new s.Ux(a));const o=this.accumulatedTranscript.trim().split(/\s+/).length;this.digitalHumanSpeaking&&o<e?n.A.debug(`${h} Discarding utterance during speaking (${o} words < ${e} threshold): "${this.accumulatedTranscript}"`):(n.A.info(`${h} ${t}: "${this.accumulatedTranscript}"`),this.sendChatPrompt(this.accumulatedTranscript)),this.resetAccumulatedState()}endsWithPunctuation(t){const e=t.trim();return 0!==e.length&&/[.!?;:]$/.test(e)}resetAccumulatedState(){this.accumulatedTranscript="",this.accumulatedConfidenceSum=0,this.accumulatedWordCount=0}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new a.A(a.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new s.im))}handleConnectionOpen(){this.state!==d.Paused&&(this.state=d.Connected)}handleConnectionClose(){if(n.A.info(`${h} Connection closed`),this.state===d.Paused)return n.A.info(`${h} Connection closed while paused — will reconnect on resume`),this.connection=null,void this.stopMicrophone();this.state=d.Disconnected,this.clientMsgSend(new s.WY(!1)),this.shouldReconnect&&(n.A.info(`${h} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}sendChatPrompt(t){t&&""!==t.trim()&&(this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language),this.dataChannelMsgSend(new o.D(t,this.options.promptMetadata)))}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}emitTransientError(t){const e=t instanceof Error?t.message:String(t);this.clientMsgSend(new s.fP(e))}dataChannelMsgSend(t){this.options.sendMessage(t)}clientMsgSend(t){this.options.messages.next(t)}}}}]);
|
|
2
2
|
//# sourceMappingURL=3.index.js.map
|
package/dist/363.index.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[363],{363(e,t,n){n.d(t,{DeepgramFluxSTT:()=>l});var s=n(514),i=n(838),o=n(33),r=n(388),a=n(58),c=n(1),h=n(260);const d="[Deepgram Flux STT]";var u;!function(e){e.Idle="Idle",e.Connecting="Connecting",e.Connected="Connected",e.Paused="Paused",e.Disconnected="Disconnected"}(u||(u={}));class l{options;connection=null;state=u.Idle;shouldReconnect=!0;stream=null;audioContext=null;workletNode=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;pendingPromptRequest=null;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;eagerPromptSentForTurn=!1;cancelledEagerRequestId=void 0;turnStartedAt=null;safetyNetTimeoutId=null;audioChunksSent=0;constructor(e){this.options=e,this.options.model=this.options.model||"flux-general-en",this.options.language=this.options.language||"en",this.options.eotThreshold=this.options.eotThreshold??.9,this.options.eagerEotThreshold=this.options.eagerEotThreshold??.5,this.options.eotTimeoutMs=this.options.eotTimeoutMs??3e3,this.options.eagerMaxTurnDurationMs=this.options.eagerMaxTurnDurationMs??5e3,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0;const t=0===this.options.eagerEotThreshold?"disabled":`${this.options.eagerEotThreshold}`,n=0===this.options.eagerMaxTurnDurationMs?"disabled":`${this.options.eagerMaxTurnDurationMs}ms`;s.A.debug(`${d} init — features: pure-flux-event-flow, eot_threshold=${this.options.eotThreshold}, eager_eot_threshold=${t}, eot_timeout_ms=${this.options.eotTimeoutMs}, eager_max_turn_duration_ms=${n}`),this.handleAppMessages()}async startRecognition(){s.A.info(`${d} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){s.A.info(`${d} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return s.A.info(`${d} Pausing speech recognition`),this.state=u.Paused,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.stream&&(this.stream.getTracks().forEach(e=>{e.enabled=!1}),s.A.debug(`${d} Audio tracks disabled`)),!0}async resume(){if(s.A.info(`${d} Resuming speech recognition`),this.state===u.Paused){if(this.stream)return this.state=u.Connected,this.stream.getTracks().forEach(e=>{e.enabled=!0}),s.A.debug(`${d} Audio tracks re-enabled`),!0;if(this.connection)return this.state=u.Connected,await this.startMicrophone(),!0;this.state=u.Disconnected}return s.A.debug(`${d} Initiating connection`),await this.connect(),!0}setChatMetadata(e){this.options.promptMetadata=e}async connect(){if(this.state!==u.Connected)if(this.state!==u.Connecting){this.state=u.Connecting;try{const e=await this.getToken();s.A.info(`${d} Connecting to Deepgram Flux v2 — api_url="${e.api_url}", sdk_version="${e.sdk_version}", token_length=${e.token?.length??0}`);const t=new h.c({accessToken:e.token,baseUrl:e.api_url}),n={model:this.options.model,encoding:"linear16",sample_rate:String(16e3),mip_opt_out:"true",...void 0!==this.options.eotThreshold&&{eot_threshold:this.options.eotThreshold},...void 0!==this.options.eagerEotThreshold&&this.options.eagerEotThreshold>0&&{eager_eot_threshold:this.options.eagerEotThreshold},...void 0!==this.options.eotTimeoutMs&&{eot_timeout_ms:this.options.eotTimeoutMs},...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms}};if("flux-general-multi"===this.options.model&&this.options.language&&(n.language_hint=this.options.language),this.connection=await t.listen.v2.connect(n),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((e,t)=>setTimeout(()=>t(new Error("Connection timeout")),1e4))]),this.state!==u.Paused&&(this.state=u.Connected),s.A.info(`${d} Connection opened`),this.setupEventHandlers(),this.state===u.Paused)return s.A.info(`${d} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),s.A.info(`${d} Connected successfully`),this.resetReconnectionState()}catch(e){this.state=u.Disconnected,s.A.error(`${d} Connection error`,s.A.serialiseError(e)),this.shouldReconnect&&(this.emitTransientError(e),this.scheduleReconnect())}}else s.A.warn(`${d} Connection already in progress`);else s.A.warn(`${d} Already connected`)}async disconnect(){if(this.state!==u.Idle&&(this.state!==u.Disconnected||this.connection)){s.A.info(`${d} Disconnecting`);try{if(this.stopMicrophone(),this.connection){try{this.connection.sendCloseStream({type:"CloseStream"})}catch{}this.connection.close(),this.connection=null}}catch(e){s.A.error(`${d} Disconnect error`,s.A.serialiseError(e))}this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.state=u.Disconnected,this.clientMsgSend(new i.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return s.A.error(`${d} Max reconnection attempts (5) reached`),void this.clientMsgSend(new i.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,s.A.info(`${d} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const e=this.options.model||"flux-general-en",t=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(e)}`,n=await fetch(t,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!n.ok)throw new Error(`Token fetch failed: ${n.status} ${n.statusText}`);return await n.json()}async startMicrophone(){try{if(s.A.info(`${d} Starting microphone`),this.stopMicrophone(),this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===u.Paused)return s.A.info(`${d} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(e=>{e.enabled=!1});this.audioContext=new AudioContext({sampleRate:16e3});const e=this.audioContext.createMediaStreamSource(this.stream),t=new Blob(["\nclass PcmCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super()\n this._buffer = new Float32Array(1280)\n this._offset = 0\n }\n process(inputs, outputs, parameters) {\n const input = inputs[0]?.[0]\n if (!input) return true\n for (let i = 0; i < input.length; i++) {\n this._buffer[this._offset++] = input[i]\n if (this._offset >= this._buffer.length) {\n const int16 = new Int16Array(this._buffer.length)\n for (let j = 0; j < this._buffer.length; j++) {\n const s = Math.max(-1, Math.min(1, this._buffer[j]))\n int16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF\n }\n this.port.postMessage(int16.buffer, [int16.buffer])\n this._offset = 0\n }\n }\n return true\n }\n}\nregisterProcessor('pcm-capture-processor', PcmCaptureProcessor)\n"],{type:"application/javascript"}),n=URL.createObjectURL(t);await this.audioContext.audioWorklet.addModule(n),URL.revokeObjectURL(n),this.workletNode=new AudioWorkletNode(this.audioContext,"pcm-capture-processor"),this.audioChunksSent=0,this.workletNode.port.onmessage=e=>{this.connection&&this.state===u.Connected&&(this.connection.sendMedia(e.data),this.audioChunksSent++,this.audioChunksSent%50==1&&s.A.debug(`${d} Audio chunks sent: ${this.audioChunksSent}, size: ${e.data.byteLength} bytes`))},e.connect(this.workletNode),this.workletNode.connect(this.audioContext.destination),s.A.info(`${d} Microphone started (linear16 PCM @ 16000Hz)`),this.clientMsgSend(new i.WY(!0))}catch(e){s.A.error(`${d} Microphone error`,s.A.serialiseError(e)),this.clientMsgSend(new i.co(new Error(JSON.stringify(e))))}}stopMicrophone(){this.workletNode&&(this.workletNode.port.close(),this.workletNode.disconnect(),this.workletNode=null),this.audioContext&&(this.audioContext.close().catch(()=>{}),this.audioContext=null),this.stream&&(this.stream.getTracks().forEach(e=>{e.stop()}),this.stream=null),s.A.info(`${d} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",e=>{if(null!==e&&"object"==typeof e&&"type"in e){const t=e;"TurnInfo"===t.type?this.handleTurnInfo(e):"Connected"===t.type?s.A.info(`${d} v2 connection confirmed`):"Error"===t.type?this.handleFatalError(e):s.A.debug(`${d} Unhandled v2 message type: ${t.type}`)}}),this.connection.on("close",e=>{this.handleConnectionClose(e)}),this.connection.on("error",e=>{const t={};e instanceof Event&&(t.type=e.type,t.target=e.target?.url??e.target?.readyState??"unknown"),s.A.error(`${d} WebSocket error event`,e,t),this.emitTransientError(e)}))}handleTurnInfo(e){try{switch(s.A.debug(`${d} TurnInfo event: ${e.event}, transcript_length=${(e.transcript||"").length}, turn_index=${e.turn_index}, eot_confidence=${e.end_of_turn_confidence??"n/a"}`),e.event){case"StartOfTurn":this.handleStartOfTurn(e);break;case"Update":this.handleUpdate(e);break;case"EagerEndOfTurn":this.handleEagerEndOfTurn(e);break;case"TurnResumed":this.handleTurnResumed(e);break;case"EndOfTurn":this.handleEndOfTurn(e);break;default:s.A.debug(`${d} Unknown TurnInfo event: ${e.event}`)}}catch(e){s.A.error(`${d} Error processing TurnInfo`,s.A.serialiseError(e))}finally{this.resetSafetyNet()}}handleStartOfTurn(e){s.A.debug(`${d} StartOfTurn: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.cancelledEagerRequestId=void 0}handleUpdate(e){const t=e.transcript||"";if(""===t)return;if(this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.turnStartedAt=Date.now(),this.clientMsgSend(new i._4)),this.isUserCurrentlySpeaking||(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new o.A(o.f.Start))),this.digitalHumanSpeaking){const e=this.countWords(t);e>=3?(s.A.info(`${d} User speech detected during avatar speaking (${e} words) — interrupting`),this.dataChannelMsgSend(new a.f),this.clientMsgSend(new i.tc),this.digitalHumanSpeaking=!1):s.A.debug(`${d} User speech during avatar speaking is only ${e} word(s) — holding off barge-in (potential backchannel)`)}const n={transcript:t,final:!1,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new i.Ux(n))}handleEagerEndOfTurn(e){const t=e.transcript||"";if(s.A.debug(`${d} EagerEndOfTurn: confidence=${e.end_of_turn_confidence}, transcript_length=${t.length}`),""===t.trim())return;if(this.digitalHumanSpeaking&&this.countWords(t)<3)return void s.A.info(`${d} EagerEndOfTurn: dropping ${this.countWords(t)}-word backchannel while avatar speaking`);const n=this.options.eagerMaxTurnDurationMs??0;if(n>0&&null!==this.turnStartedAt){const e=Date.now()-this.turnStartedAt;if(e>n)return void s.A.info(`${d} EagerEndOfTurn: suppressed — turn duration ${e}ms exceeds eagerMaxTurnDurationMs=${n}; deferring to EndOfTurn`)}s.A.info(`${d} EagerEndOfTurn: sending prompt early (${this.countWords(t)} words, ${t.length} chars)`),this.eagerPromptSentForTurn=!0,this.sendChatPromptRaw(t,this.takeCancelledEagerRequestId())}takeCancelledEagerRequestId(){const e=this.cancelledEagerRequestId;return this.cancelledEagerRequestId=void 0,e}handleTurnResumed(e){s.A.debug(`${d} TurnResumed: turn_index=${e.turn_index}`),null!==this.pendingPromptRequest&&(this.cancelledEagerRequestId=this.pendingPromptRequest.requestId),this.eagerPromptSentForTurn&&(s.A.info(`${d} TurnResumed: cancelling in-flight eager prompt via StopSpeaking`),this.dataChannelMsgSend(new a.f)),this.clearPendingPromptRequest(),this.eagerPromptSentForTurn=!1}handleEndOfTurn(e){const t=e.transcript||"";if(s.A.info(`${d} EndOfTurn: transcript_length=${t.length}, confidence=${e.end_of_turn_confidence}`),""!==t.trim()){const n={transcript:t,final:!0,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new i.Ux(n)),this.digitalHumanSpeaking&&this.countWords(t)<3?(s.A.info(`${d} EndOfTurn: dropping ${this.countWords(t)}-word backchannel while avatar speaking`),this.clearPendingPromptRequest()):this.eagerPromptSentForTurn?(s.A.debug(`${d} EndOfTurn: skipping ChatPrompt — eager already fired for this turn`),this.emitPendingPromptRequest()):(this.sendChatPromptRaw(t,this.takeCancelledEagerRequestId()),this.emitPendingPromptRequest())}this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.cancelledEagerRequestId=void 0,this.resetSpeakingStates()}handleFatalError(e){if(this.state===u.Paused&&"INACTIVE_CLIENT"===e.code)return s.A.info(`${d} Inactivity termination while paused (${e.code}) — releasing socket; resume will reconnect`),void this.releasePausedConnection();s.A.error(`${d} Fatal error from Deepgram: ${e.code} — ${e.description}`),this.clientMsgSend(new i.Cj(`Deepgram error: ${e.code} — ${e.description}`))}calculateWordConfidence(e){return e&&0!==e.length?e.reduce((e,t)=>e+t.confidence,0)/e.length:1}countWords(e){return e.trim().split(/\s+/).filter(Boolean).length}handleAppMessages(){this.options.messages.subscribe(e=>{switch(e.uneeqMessageType){case i.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case i.Yg.PromptResult:e.promptResult.success||this.handleSpeakingEnd();break;case i.Yg.AvatarAnswer:""===e.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case i.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case i.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case i.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case i.Yg.CustomMetadataUpdated:this.options.promptMetadata=e.chatMetadata;break;case i.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleConnectionOpen(){this.state!==u.Paused&&(this.state=u.Connected)}handleConnectionClose(e){const t=e?.code??"unknown",n=e?.reason??"";if(s.A.info(`${d} Connection closed — code=${t}, reason="${n}"`),this.state===u.Paused)return s.A.info(`${d} Connection closed while paused — will reconnect on resume`),this.connection=null,this.stopMicrophone(),this.clearSafetyNet(),void this.resetSpeakingStates();this.state=u.Disconnected,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.clientMsgSend(new i.WY(!1)),this.shouldReconnect&&(s.A.info(`${d} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}emitTransientError(e){const t=e instanceof Error?e.message:String(e);this.clientMsgSend(new i.fP(t))}sendChatPromptRaw(e,t){if(!e||""===e.trim())return;this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language);const n=t??(0,c.g)();void 0!==t&&s.A.info(`${d} sendChatPromptRaw: reusing requestId=${n} from cancelled eager prompt`);const i={...this.options.promptMetadata},o=this.turnStartedAt??Date.now(),a=Date.now()-o;this.pendingPromptRequest={prompt:e,requestId:n,metadata:i,speakingDurationMs:a,speechStartedAt:o},this.dataChannelMsgSend(new r.D(e,this.options.promptMetadata,n,!1))}emitPendingPromptRequest(){const e=this.pendingPromptRequest;null!==e&&(this.clientMsgSend(new i.bS({prompt:e.prompt,requestId:e.requestId,metadata:e.metadata,speakingDurationMs:e.speakingDurationMs,speechStartedAt:e.speechStartedAt})),s.A.debug(`${d} PromptRequest emitted — speakingDurationMs=${e.speakingDurationMs}, requestId=${e.requestId}`),this.pendingPromptRequest=null)}clearPendingPromptRequest(){this.pendingPromptRequest=null}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}resetSafetyNet(){this.clearSafetyNet(),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&(this.safetyNetTimeoutId=setTimeout(()=>{s.A.warn(`${d} Safety net: no TurnInfo events for ${this.options.safetyNetTimeoutMs}ms while speaking — resetting`),this.resetSpeakingStates()},this.options.safetyNetTimeoutMs))}clearSafetyNet(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null)}releasePausedConnection(){if(this.connection){try{this.connection.close()}catch{}this.connection=null}this.stopMicrophone(),this.clearSafetyNet(),this.resetSpeakingStates(),this.state=u.Paused}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new o.A(o.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new i.im))}dataChannelMsgSend(e){this.options.sendMessage(e)}clientMsgSend(e){this.options.messages.next(e)}}}}]);
|
|
1
|
+
"use strict";(Object("undefined"!=typeof self?self:this).webpackChunkUneeq=Object("undefined"!=typeof self?self:this).webpackChunkUneeq||[]).push([[363],{363(e,t,n){n.d(t,{DeepgramFluxSTT:()=>l});var s=n(514),i=n(838),o=n(33),r=n(388),a=n(58),c=n(1),h=n(622);const d="[Deepgram Flux STT]";var u;!function(e){e.Idle="Idle",e.Connecting="Connecting",e.Connected="Connected",e.Paused="Paused",e.Disconnected="Disconnected"}(u||(u={}));class l{options;connection=null;state=u.Idle;shouldReconnect=!0;stream=null;audioContext=null;workletNode=null;reconnectAttempts=0;reconnectDelay=1e3;reconnectTimeoutId=null;digitalHumanSpeaking=!1;pendingPromptRequest=null;isUserCurrentlySpeaking=!1;isUiShowingSpeaking=!1;eagerPromptSentForTurn=!1;cancelledEagerRequestId=void 0;turnStartedAt=null;safetyNetTimeoutId=null;audioChunksSent=0;constructor(e){this.options=e,this.options.model=this.options.model||"flux-general-en",this.options.language=this.options.language||"en",this.options.eotThreshold=this.options.eotThreshold??.9,this.options.eagerEotThreshold=this.options.eagerEotThreshold??.5,this.options.eotTimeoutMs=this.options.eotTimeoutMs??3e3,this.options.eagerMaxTurnDurationMs=this.options.eagerMaxTurnDurationMs??5e3,this.options.safetyNetTimeoutMs=this.options.safetyNetTimeoutMs??2e3,this.options.echoCancellation=this.options.echoCancellation??!0,this.options.noiseSuppression=this.options.noiseSuppression??!0,this.options.autoGainControl=this.options.autoGainControl??!0;const t=0===this.options.eagerEotThreshold?"disabled":`${this.options.eagerEotThreshold}`,n=0===this.options.eagerMaxTurnDurationMs?"disabled":`${this.options.eagerMaxTurnDurationMs}ms`;s.A.debug(`${d} init — features: pure-flux-event-flow, eot_threshold=${this.options.eotThreshold}, eager_eot_threshold=${t}, eot_timeout_ms=${this.options.eotTimeoutMs}, eager_max_turn_duration_ms=${n}`),this.handleAppMessages()}async startRecognition(){s.A.info(`${d} Starting speech recognition`),this.shouldReconnect=!0,this.resetReconnectionState(),await this.connect()}async stopRecognition(){s.A.info(`${d} Stopping speech recognition`),this.shouldReconnect=!1,this.clearReconnectTimeout(),await this.disconnect()}async pause(){return s.A.info(`${d} Pausing speech recognition`),this.state=u.Paused,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.stream&&(this.stream.getTracks().forEach(e=>{e.enabled=!1}),s.A.debug(`${d} Audio tracks disabled`)),!0}async resume(){if(s.A.info(`${d} Resuming speech recognition`),this.state===u.Paused){if(this.stream)return this.state=u.Connected,this.stream.getTracks().forEach(e=>{e.enabled=!0}),s.A.debug(`${d} Audio tracks re-enabled`),!0;if(this.connection)return this.state=u.Connected,await this.startMicrophone(),!0;this.state=u.Disconnected}return s.A.debug(`${d} Initiating connection`),await this.connect(),!0}setChatMetadata(e){this.options.promptMetadata=e}async connect(){if(this.state!==u.Connected)if(this.state!==u.Connecting){this.state=u.Connecting;try{const e=await this.getToken();s.A.info(`${d} Connecting to Deepgram Flux v2 — api_url="${e.api_url}", sdk_version="${e.sdk_version}", token_length=${e.token?.length??0}`);const t=new h.c({accessToken:e.token,baseUrl:e.api_url}),n={model:this.options.model,encoding:"linear16",sample_rate:String(16e3),mip_opt_out:"true",...void 0!==this.options.eotThreshold&&{eot_threshold:this.options.eotThreshold},...void 0!==this.options.eagerEotThreshold&&this.options.eagerEotThreshold>0&&{eager_eot_threshold:this.options.eagerEotThreshold},...void 0!==this.options.eotTimeoutMs&&{eot_timeout_ms:this.options.eotTimeoutMs},...this.options.keyterms&&this.options.keyterms.length>0&&{keyterm:this.options.keyterms}};if("flux-general-multi"===this.options.model&&this.options.language&&(n.language_hint=this.options.language),this.connection=await t.listen.v2.connect(n),this.connection.connect(),await Promise.race([this.connection.waitForOpen(),new Promise((e,t)=>setTimeout(()=>t(new Error("Connection timeout")),1e4))]),this.state!==u.Paused&&(this.state=u.Connected),s.A.info(`${d} Connection opened`),this.setupEventHandlers(),this.state===u.Paused)return s.A.info(`${d} Pause requested during connection — staying paused`),void this.resetReconnectionState();await this.startMicrophone(),s.A.info(`${d} Connected successfully`),this.resetReconnectionState()}catch(e){this.state=u.Disconnected,s.A.error(`${d} Connection error`,s.A.serialiseError(e)),this.shouldReconnect&&(this.emitTransientError(e),this.scheduleReconnect())}}else s.A.warn(`${d} Connection already in progress`);else s.A.warn(`${d} Already connected`)}async disconnect(){if(this.state!==u.Idle&&(this.state!==u.Disconnected||this.connection)){s.A.info(`${d} Disconnecting`);try{if(this.stopMicrophone(),this.connection){try{this.connection.sendCloseStream({type:"CloseStream"})}catch{}this.connection.close(),this.connection=null}}catch(e){s.A.error(`${d} Disconnect error`,s.A.serialiseError(e))}this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.state=u.Disconnected,this.clientMsgSend(new i.WY(!1))}}scheduleReconnect(){if(this.reconnectAttempts>=5)return s.A.error(`${d} Max reconnection attempts (5) reached`),void this.clientMsgSend(new i.Cj("Unable to connect to speech recognition service after 5 attempts"));this.reconnectAttempts++,s.A.info(`${d} Scheduling reconnection attempt ${this.reconnectAttempts}/5 in ${this.reconnectDelay}ms`),this.reconnectTimeoutId=setTimeout(()=>{this.connect()},this.reconnectDelay),this.reconnectDelay=Math.min(2*this.reconnectDelay,3e4)}resetReconnectionState(){this.reconnectAttempts=0,this.reconnectDelay=1e3,this.clearReconnectTimeout()}clearReconnectTimeout(){this.reconnectTimeoutId&&(clearTimeout(this.reconnectTimeoutId),this.reconnectTimeoutId=null)}async getToken(){const e=this.options.model||"flux-general-en",t=`${this.options.connectionUrl}/speech-recognition-service/deepgram/token?model=${encodeURIComponent(e)}`,n=await fetch(t,{method:"GET",headers:{Authorization:`Bearer ${this.options.jwtToken}`,"Content-Type":"application/json"}});if(!n.ok)throw new Error(`Token fetch failed: ${n.status} ${n.statusText}`);return await n.json()}async startMicrophone(){try{if(s.A.info(`${d} Starting microphone`),this.stopMicrophone(),this.stream=await navigator.mediaDevices.getUserMedia({audio:{deviceId:this.options.microphoneDeviceId?{exact:this.options.microphoneDeviceId}:void 0,echoCancellation:this.options.echoCancellation,noiseSuppression:this.options.noiseSuppression,autoGainControl:this.options.autoGainControl}}),this.state===u.Paused)return s.A.info(`${d} Paused during getUserMedia — keeping stream but disabling tracks`),void this.stream.getTracks().forEach(e=>{e.enabled=!1});this.audioContext=new AudioContext({sampleRate:16e3});const e=this.audioContext.createMediaStreamSource(this.stream),t=new Blob(["\nclass PcmCaptureProcessor extends AudioWorkletProcessor {\n constructor() {\n super()\n this._buffer = new Float32Array(1280)\n this._offset = 0\n }\n process(inputs, outputs, parameters) {\n const input = inputs[0]?.[0]\n if (!input) return true\n for (let i = 0; i < input.length; i++) {\n this._buffer[this._offset++] = input[i]\n if (this._offset >= this._buffer.length) {\n const int16 = new Int16Array(this._buffer.length)\n for (let j = 0; j < this._buffer.length; j++) {\n const s = Math.max(-1, Math.min(1, this._buffer[j]))\n int16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF\n }\n this.port.postMessage(int16.buffer, [int16.buffer])\n this._offset = 0\n }\n }\n return true\n }\n}\nregisterProcessor('pcm-capture-processor', PcmCaptureProcessor)\n"],{type:"application/javascript"}),n=URL.createObjectURL(t);await this.audioContext.audioWorklet.addModule(n),URL.revokeObjectURL(n),this.workletNode=new AudioWorkletNode(this.audioContext,"pcm-capture-processor"),this.audioChunksSent=0,this.workletNode.port.onmessage=e=>{this.connection&&this.state===u.Connected&&(this.connection.sendMedia(e.data),this.audioChunksSent++,this.audioChunksSent%50==1&&s.A.debug(`${d} Audio chunks sent: ${this.audioChunksSent}, size: ${e.data.byteLength} bytes`))},e.connect(this.workletNode),this.workletNode.connect(this.audioContext.destination),s.A.info(`${d} Microphone started (linear16 PCM @ 16000Hz)`),this.clientMsgSend(new i.WY(!0))}catch(e){s.A.error(`${d} Microphone error`,s.A.serialiseError(e)),this.clientMsgSend(new i.co(new Error(JSON.stringify(e))))}}stopMicrophone(){this.workletNode&&(this.workletNode.port.close(),this.workletNode.disconnect(),this.workletNode=null),this.audioContext&&(this.audioContext.close().catch(()=>{}),this.audioContext=null),this.stream&&(this.stream.getTracks().forEach(e=>{e.stop()}),this.stream=null),s.A.info(`${d} Microphone stopped`)}setupEventHandlers(){this.connection&&(this.connection.on("open",()=>{this.handleConnectionOpen()}),this.connection.on("message",e=>{if(null!==e&&"object"==typeof e&&"type"in e){const t=e;"TurnInfo"===t.type?this.handleTurnInfo(e):"Connected"===t.type?s.A.info(`${d} v2 connection confirmed`):"Error"===t.type?this.handleFatalError(e):s.A.debug(`${d} Unhandled v2 message type: ${t.type}`)}}),this.connection.on("close",e=>{this.handleConnectionClose(e)}),this.connection.on("error",e=>{const t={};e instanceof Event&&(t.type=e.type,t.target=e.target?.url??e.target?.readyState??"unknown"),s.A.error(`${d} WebSocket error event`,e,t),this.emitTransientError(e)}))}handleTurnInfo(e){try{switch(s.A.debug(`${d} TurnInfo event: ${e.event}, transcript_length=${(e.transcript||"").length}, turn_index=${e.turn_index}, eot_confidence=${e.end_of_turn_confidence??"n/a"}`),e.event){case"StartOfTurn":this.handleStartOfTurn(e);break;case"Update":this.handleUpdate(e);break;case"EagerEndOfTurn":this.handleEagerEndOfTurn(e);break;case"TurnResumed":this.handleTurnResumed(e);break;case"EndOfTurn":this.handleEndOfTurn(e);break;default:s.A.debug(`${d} Unknown TurnInfo event: ${e.event}`)}}catch(e){s.A.error(`${d} Error processing TurnInfo`,s.A.serialiseError(e))}finally{this.resetSafetyNet()}}handleStartOfTurn(e){s.A.debug(`${d} StartOfTurn: turn_index=${e.turn_index}`),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.cancelledEagerRequestId=void 0}handleUpdate(e){const t=e.transcript||"";if(""===t)return;if(this.isUiShowingSpeaking||(this.isUiShowingSpeaking=!0,this.turnStartedAt=Date.now(),this.clientMsgSend(new i._4)),this.isUserCurrentlySpeaking||(this.isUserCurrentlySpeaking=!0,this.dataChannelMsgSend(new o.A(o.f.Start))),this.digitalHumanSpeaking){const e=this.countWords(t);e>=3?(s.A.info(`${d} User speech detected during avatar speaking (${e} words) — interrupting`),this.dataChannelMsgSend(new a.f),this.clientMsgSend(new i.tc),this.digitalHumanSpeaking=!1):s.A.debug(`${d} User speech during avatar speaking is only ${e} word(s) — holding off barge-in (potential backchannel)`)}const n={transcript:t,final:!1,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new i.Ux(n))}handleEagerEndOfTurn(e){const t=e.transcript||"";if(s.A.debug(`${d} EagerEndOfTurn: confidence=${e.end_of_turn_confidence}, transcript_length=${t.length}`),""===t.trim())return;if(this.digitalHumanSpeaking&&this.countWords(t)<3)return void s.A.info(`${d} EagerEndOfTurn: dropping ${this.countWords(t)}-word backchannel while avatar speaking`);const n=this.options.eagerMaxTurnDurationMs??0;if(n>0&&null!==this.turnStartedAt){const e=Date.now()-this.turnStartedAt;if(e>n)return void s.A.info(`${d} EagerEndOfTurn: suppressed — turn duration ${e}ms exceeds eagerMaxTurnDurationMs=${n}; deferring to EndOfTurn`)}s.A.info(`${d} EagerEndOfTurn: sending prompt early (${this.countWords(t)} words, ${t.length} chars)`),this.eagerPromptSentForTurn=!0,this.sendChatPromptRaw(t,this.takeCancelledEagerRequestId())}takeCancelledEagerRequestId(){const e=this.cancelledEagerRequestId;return this.cancelledEagerRequestId=void 0,e}handleTurnResumed(e){s.A.debug(`${d} TurnResumed: turn_index=${e.turn_index}`),null!==this.pendingPromptRequest&&(this.cancelledEagerRequestId=this.pendingPromptRequest.requestId),this.eagerPromptSentForTurn&&(s.A.info(`${d} TurnResumed: cancelling in-flight eager prompt via StopSpeaking`),this.dataChannelMsgSend(new a.f)),this.clearPendingPromptRequest(),this.eagerPromptSentForTurn=!1}handleEndOfTurn(e){const t=e.transcript||"";if(s.A.info(`${d} EndOfTurn: transcript_length=${t.length}, confidence=${e.end_of_turn_confidence}`),""!==t.trim()){const n={transcript:t,final:!0,confidence:this.calculateWordConfidence(e.words),language_code:this.options.language||""};this.clientMsgSend(new i.Ux(n)),this.digitalHumanSpeaking&&this.countWords(t)<3?(s.A.info(`${d} EndOfTurn: dropping ${this.countWords(t)}-word backchannel while avatar speaking`),this.clearPendingPromptRequest()):this.eagerPromptSentForTurn?(s.A.debug(`${d} EndOfTurn: skipping ChatPrompt — eager already fired for this turn`),this.emitPendingPromptRequest()):(this.sendChatPromptRaw(t,this.takeCancelledEagerRequestId()),this.emitPendingPromptRequest())}this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.cancelledEagerRequestId=void 0,this.resetSpeakingStates()}handleFatalError(e){if(this.state===u.Paused&&"INACTIVE_CLIENT"===e.code)return s.A.info(`${d} Inactivity termination while paused (${e.code}) — releasing socket; resume will reconnect`),void this.releasePausedConnection();s.A.error(`${d} Fatal error from Deepgram: ${e.code} — ${e.description}`),this.clientMsgSend(new i.Cj(`Deepgram error: ${e.code} — ${e.description}`))}calculateWordConfidence(e){return e&&0!==e.length?e.reduce((e,t)=>e+t.confidence,0)/e.length:1}countWords(e){return e.trim().split(/\s+/).filter(Boolean).length}handleAppMessages(){this.options.messages.subscribe(e=>{switch(e.uneeqMessageType){case i.Yg.AvatarStartedSpeaking:this.digitalHumanSpeaking=!0;break;case i.Yg.PromptResult:e.promptResult.success||this.handleSpeakingEnd();break;case i.Yg.AvatarAnswer:""===e.answerSpeech.replace(/<[^>]*>/g,"")&&this.handleSpeakingEnd();break;case i.Yg.AvatarStoppedSpeaking:this.handleSpeakingEnd();break;case i.Yg.SessionEnded:this.shouldReconnect=!1,this.stopRecognition();break;case i.Yg.SessionReconnecting:this.handleSpeakingEnd(),this.shouldReconnect=!1,this.stopRecognition();break;case i.Yg.CustomMetadataUpdated:this.options.promptMetadata=e.chatMetadata;break;case i.Yg.SessionBackendError:this.handleSpeakingEnd()}})}handleConnectionOpen(){this.state!==u.Paused&&(this.state=u.Connected)}handleConnectionClose(e){const t=e?.code??"unknown",n=e?.reason??"";if(s.A.info(`${d} Connection closed — code=${t}, reason="${n}"`),this.state===u.Paused)return s.A.info(`${d} Connection closed while paused — will reconnect on resume`),this.connection=null,this.stopMicrophone(),this.clearSafetyNet(),void this.resetSpeakingStates();this.state=u.Disconnected,this.clearSafetyNet(),this.resetSpeakingStates(),this.eagerPromptSentForTurn=!1,this.turnStartedAt=null,this.pendingPromptRequest=null,this.cancelledEagerRequestId=void 0,this.clientMsgSend(new i.WY(!1)),this.shouldReconnect&&(s.A.info(`${d} Unexpected disconnect, attempting reconnection...`),this.scheduleReconnect())}emitTransientError(e){const t=e instanceof Error?e.message:String(e);this.clientMsgSend(new i.fP(t))}sendChatPromptRaw(e,t){if(!e||""===e.trim())return;this.options.language&&(this.options.promptMetadata.userSpokenLocale=this.options.language);const n=t??(0,c.g)();void 0!==t&&s.A.info(`${d} sendChatPromptRaw: reusing requestId=${n} from cancelled eager prompt`);const i={...this.options.promptMetadata},o=this.turnStartedAt??Date.now(),a=Date.now()-o;this.pendingPromptRequest={prompt:e,requestId:n,metadata:i,speakingDurationMs:a,speechStartedAt:o},this.dataChannelMsgSend(new r.D(e,this.options.promptMetadata,n,!1))}emitPendingPromptRequest(){const e=this.pendingPromptRequest;null!==e&&(this.clientMsgSend(new i.bS({prompt:e.prompt,requestId:e.requestId,metadata:e.metadata,speakingDurationMs:e.speakingDurationMs,speechStartedAt:e.speechStartedAt})),s.A.debug(`${d} PromptRequest emitted — speakingDurationMs=${e.speakingDurationMs}, requestId=${e.requestId}`),this.pendingPromptRequest=null)}clearPendingPromptRequest(){this.pendingPromptRequest=null}handleSpeakingEnd(){this.digitalHumanSpeaking=!1}resetSafetyNet(){this.clearSafetyNet(),(this.isUiShowingSpeaking||this.isUserCurrentlySpeaking)&&(this.safetyNetTimeoutId=setTimeout(()=>{s.A.warn(`${d} Safety net: no TurnInfo events for ${this.options.safetyNetTimeoutMs}ms while speaking — resetting`),this.resetSpeakingStates()},this.options.safetyNetTimeoutMs))}clearSafetyNet(){this.safetyNetTimeoutId&&(clearTimeout(this.safetyNetTimeoutId),this.safetyNetTimeoutId=null)}releasePausedConnection(){if(this.connection){try{this.connection.close()}catch{}this.connection=null}this.stopMicrophone(),this.clearSafetyNet(),this.resetSpeakingStates(),this.state=u.Paused}resetSpeakingStates(){this.isUserCurrentlySpeaking&&(this.isUserCurrentlySpeaking=!1,this.dataChannelMsgSend(new o.A(o.f.Stop))),this.isUiShowingSpeaking&&(this.isUiShowingSpeaking=!1,this.clientMsgSend(new i.im))}dataChannelMsgSend(e){this.options.sendMessage(e)}clientMsgSend(e){this.options.messages.next(e)}}}}]);
|
|
2
2
|
//# sourceMappingURL=363.index.js.map
|