@livekit/agents-plugin-deepgram 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stt.cjs +6 -1
- package/dist/stt.cjs.map +1 -1
- package/dist/stt.d.ts.map +1 -1
- package/dist/stt.js +6 -1
- package/dist/stt.js.map +1 -1
- package/package.json +5 -5
- package/src/stt.ts +7 -1
package/dist/stt.cjs
CHANGED
|
@@ -257,7 +257,12 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
257
257
|
case "SpeechStarted": {
|
|
258
258
|
if (this.#speaking) return;
|
|
259
259
|
this.#speaking = true;
|
|
260
|
-
this.queue.
|
|
260
|
+
if (!this.queue.closed) {
|
|
261
|
+
try {
|
|
262
|
+
this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
|
|
263
|
+
} catch (e) {
|
|
264
|
+
}
|
|
265
|
+
}
|
|
261
266
|
break;
|
|
262
267
|
}
|
|
263
268
|
// see this page:
|
package/dist/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBASO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,gBAChB,4BAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBASO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,gBAChB,4BAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
package/dist/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;IAIhB,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAa9C,OAAO,CAAC,eAAe;IALzB,KAAK,SAA2B;gBAG9B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,UAAU,EACR,eAAe,EAAE,eAAe;cAY1B,GAAG;IAyEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;IAIhB,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAa9C,OAAO,CAAC,eAAe;IALzB,KAAK,SAA2B;gBAG9B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,UAAU,EACR,eAAe,EAAE,eAAe;cAY1B,GAAG;IAyEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAqLvC,OAAO,CAAC,qBAAqB;CAU9B"}
|
package/dist/stt.js
CHANGED
|
@@ -241,7 +241,12 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
241
241
|
case "SpeechStarted": {
|
|
242
242
|
if (this.#speaking) return;
|
|
243
243
|
this.#speaking = true;
|
|
244
|
-
this.queue.
|
|
244
|
+
if (!this.queue.closed) {
|
|
245
|
+
try {
|
|
246
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
247
|
+
} catch (e) {
|
|
248
|
+
}
|
|
249
|
+
}
|
|
245
250
|
break;
|
|
246
251
|
}
|
|
247
252
|
// see this page:
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,YAChB,aAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,YAChB,aAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-deepgram",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.23",
|
|
4
4
|
"description": "Deepgram plugin for LiveKit Agents for Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,16 +30,16 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-silero": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.23",
|
|
34
|
+
"@livekit/agents-plugin-silero": "1.0.23",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.23"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"ws": "^8.16.0"
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@livekit/rtc-node": "^0.13.12",
|
|
42
|
-
"@livekit/agents": "1.0.
|
|
42
|
+
"@livekit/agents": "1.0.23"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
45
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/stt.ts
CHANGED
|
@@ -315,7 +315,13 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
315
315
|
// It's also possible we receive a transcript without a SpeechStarted event.
|
|
316
316
|
if (this.#speaking) return;
|
|
317
317
|
this.#speaking = true;
|
|
318
|
-
this.queue.
|
|
318
|
+
if (!this.queue.closed) {
|
|
319
|
+
try {
|
|
320
|
+
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
321
|
+
} catch (e) {
|
|
322
|
+
// ignore
|
|
323
|
+
}
|
|
324
|
+
}
|
|
319
325
|
break;
|
|
320
326
|
}
|
|
321
327
|
// see this page:
|