@livekit/agents-plugin-deepgram 1.0.37 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/stt.cjs CHANGED
@@ -55,7 +55,8 @@ class STT extends import_agents.stt.STT {
55
55
  constructor(opts = defaultSTTOptions) {
56
56
  super({
57
57
  streaming: true,
58
- interimResults: opts.interimResults ?? defaultSTTOptions.interimResults
58
+ interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,
59
+ alignedTranscript: "word"
59
60
  });
60
61
  if (opts.apiKey === void 0 && defaultSTTOptions.apiKey === void 0) {
61
62
  throw new Error(
@@ -276,7 +277,11 @@ class SpeechStream extends import_agents.stt.SpeechStream {
276
277
  const isFinal = json["is_final"];
277
278
  const isEndpoint = json["speech_final"];
278
279
  this.#requestId = requestId;
279
- const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
280
+ const alternatives = liveTranscriptionToSpeechData(
281
+ this.#opts.language,
282
+ json,
283
+ this.startTimeOffset
284
+ );
280
285
  if (alternatives[0] && alternatives[0].text) {
281
286
  if (!this.#speaking) {
282
287
  this.#speaking = true;
@@ -340,15 +345,25 @@ class SpeechStream extends import_agents.stt.SpeechStream {
340
345
  this.queue.put(usageEvent);
341
346
  }
342
347
  }
343
- const liveTranscriptionToSpeechData = (language, data) => {
348
+ const liveTranscriptionToSpeechData = (language, data, startTimeOffset = 0) => {
344
349
  const alts = data["channel"]["alternatives"];
345
- return alts.map((alt) => ({
346
- language,
347
- startTime: alt["words"].length ? alt["words"][0]["start"] : 0,
348
- endTime: alt["words"].length ? alt["words"][alt["words"].length - 1]["end"] : 0,
349
- confidence: alt["confidence"],
350
- text: alt["transcript"]
351
- }));
350
+ return alts.map((alt) => {
351
+ const wordsData = alt["words"] ?? [];
352
+ return {
353
+ language,
354
+ startTime: wordsData.length ? wordsData[0]["start"] + startTimeOffset : startTimeOffset,
355
+ endTime: wordsData.length ? wordsData[wordsData.length - 1]["end"] + startTimeOffset : startTimeOffset,
356
+ confidence: alt["confidence"],
357
+ text: alt["transcript"],
358
+ words: wordsData.map((word) => ({
359
+ text: word["word"] ?? "",
360
+ startTime: (word["start"] ?? 0) + startTimeOffset,
361
+ endTime: (word["end"] ?? 0) + startTimeOffset,
362
+ confidence: word["confidence"] ?? 0,
363
+ startTimeOffset
364
+ }))
365
+ };
366
+ });
352
367
  };
353
368
  // Annotate the CommonJS export names for ESM import in node:
354
369
  0 && (module.exports = {
package/dist/stt.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAUO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n alignedTranscript: 'word',\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(\n this.#opts.language!,\n json,\n this.startTimeOffset,\n );\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n startTimeOffset: number = 0,\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => {\n const wordsData: any[] = alt['words'] ?? [];\n\n return {\n language,\n startTime: wordsData.length ? wordsData[0]['start'] + startTimeOffset : startTimeOffset,\n endTime: wordsData.length\n ? wordsData[wordsData.length - 1]['end'] + startTimeOffset\n : startTimeOffset,\n confidence: alt['confidence'],\n text: alt['transcript'],\n words: wordsData.map((word) => ({\n text: word['word'] ?? '',\n startTime: (word['start'] ?? 0) + startTimeOffset,\n endTime: (word['end'] ?? 0) + startTimeOffset,\n confidence: word['confidence'] ?? 0.0,\n startTimeOffset,\n })),\n };\n });\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAUO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,MACzD,mBAAmB;AAAA,IACrB,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe;AAAA,kBACnB,KAAK,MAAM;AAAA,kBACX;AAAA,kBACA,KAAK;AAAA,gBACP;AAKA,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,MACA,kBAA0B,MACL;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,QAAQ;AACvB,UAAM,YAAmB,IAAI,OAAO,KAAK,CAAC;AAE1C,WAAO;AAAA,MACL;AAAA,MACA,WAAW,UAAU,SAAS,UAAU,CAAC,EAAE,OAAO,IAAI,kBAAkB;AAAA,MACxE,SAAS,UAAU,SACf,UAAU,UAAU,SAAS,CAAC,EAAE,KAAK,IAAI,kBACzC;AAAA,MACJ,YAAY,IAAI,YAAY;AAAA,MAC5B,MAAM,IAAI,YAAY;AAAA,MACtB,OAAO,UAAU,IAAI,CAAC,UAAU;AAAA,QAC9B,MAAM,KAAK,MAAM,KAAK;AAAA,QACtB,YAAY,KAAK,OAAO,KAAK,KAAK;AAAA,QAClC,UAAU,KAAK,KAAK,KAAK,KAAK;AAAA,QAC9B,YAAY,KAAK,YAAY,KAAK;AAAA,QAClC;AAAA,MACF,EAAE;AAAA,IACJ;AAAA,EACF,CAAC;AACH;","names":["stt"]}
package/dist/stt.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACpB;AAwBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAI7D,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAWvD,GAAG;IA0EnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IA0LvC,OAAO,CAAC,qBAAqB;CAU9B"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACpB;AAwBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAwCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAI7D,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAWvD,GAAG;IA0EnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IA8LvC,OAAO,CAAC,qBAAqB;CAU9B"}
package/dist/stt.js CHANGED
@@ -39,7 +39,8 @@ class STT extends stt.STT {
39
39
  constructor(opts = defaultSTTOptions) {
40
40
  super({
41
41
  streaming: true,
42
- interimResults: opts.interimResults ?? defaultSTTOptions.interimResults
42
+ interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,
43
+ alignedTranscript: "word"
43
44
  });
44
45
  if (opts.apiKey === void 0 && defaultSTTOptions.apiKey === void 0) {
45
46
  throw new Error(
@@ -260,7 +261,11 @@ class SpeechStream extends stt.SpeechStream {
260
261
  const isFinal = json["is_final"];
261
262
  const isEndpoint = json["speech_final"];
262
263
  this.#requestId = requestId;
263
- const alternatives = liveTranscriptionToSpeechData(this.#opts.language, json);
264
+ const alternatives = liveTranscriptionToSpeechData(
265
+ this.#opts.language,
266
+ json,
267
+ this.startTimeOffset
268
+ );
264
269
  if (alternatives[0] && alternatives[0].text) {
265
270
  if (!this.#speaking) {
266
271
  this.#speaking = true;
@@ -324,15 +329,25 @@ class SpeechStream extends stt.SpeechStream {
324
329
  this.queue.put(usageEvent);
325
330
  }
326
331
  }
327
- const liveTranscriptionToSpeechData = (language, data) => {
332
+ const liveTranscriptionToSpeechData = (language, data, startTimeOffset = 0) => {
328
333
  const alts = data["channel"]["alternatives"];
329
- return alts.map((alt) => ({
330
- language,
331
- startTime: alt["words"].length ? alt["words"][0]["start"] : 0,
332
- endTime: alt["words"].length ? alt["words"][alt["words"].length - 1]["end"] : 0,
333
- confidence: alt["confidence"],
334
- text: alt["transcript"]
335
- }));
334
+ return alts.map((alt) => {
335
+ const wordsData = alt["words"] ?? [];
336
+ return {
337
+ language,
338
+ startTime: wordsData.length ? wordsData[0]["start"] + startTimeOffset : startTimeOffset,
339
+ endTime: wordsData.length ? wordsData[wordsData.length - 1]["end"] + startTimeOffset : startTimeOffset,
340
+ confidence: alt["confidence"],
341
+ text: alt["transcript"],
342
+ words: wordsData.map((word) => ({
343
+ text: word["word"] ?? "",
344
+ startTime: (word["start"] ?? 0) + startTimeOffset,
345
+ endTime: (word["end"] ?? 0) + startTimeOffset,
346
+ confidence: word["confidence"] ?? 0,
347
+ startTimeOffset
348
+ }))
349
+ };
350
+ });
336
351
  };
337
352
  export {
338
353
  STT,
package/dist/stt.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,eAAe,aAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n alignedTranscript: 'word',\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(\n this.#opts.language!,\n json,\n this.startTimeOffset,\n );\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n startTimeOffset: number = 0,\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => {\n const wordsData: any[] = alt['words'] ?? [];\n\n return {\n language,\n startTime: wordsData.length ? wordsData[0]['start'] + startTimeOffset : startTimeOffset,\n endTime: wordsData.length\n ? wordsData[wordsData.length - 1]['end'] + startTimeOffset\n : startTimeOffset,\n confidence: alt['confidence'],\n text: alt['transcript'],\n words: wordsData.map((word) => ({\n text: word['word'] ?? '',\n startTime: (word['start'] ?? 0) + startTimeOffset,\n endTime: (word['end'] ?? 0) + startTimeOffset,\n confidence: word['confidence'] ?? 0.0,\n startTimeOffset,\n })),\n };\n });\n};\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,MACzD,mBAAmB;AAAA,IACrB,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,eAAe,aAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe;AAAA,kBACnB,KAAK,MAAM;AAAA,kBACX;AAAA,kBACA,KAAK;AAAA,gBACP;AAKA,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,MACA,kBAA0B,MACL;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,QAAQ;AACvB,UAAM,YAAmB,IAAI,OAAO,KAAK,CAAC;AAE1C,WAAO;AAAA,MACL;AAAA,MACA,WAAW,UAAU,SAAS,UAAU,CAAC,EAAE,OAAO,IAAI,kBAAkB;AAAA,MACxE,SAAS,UAAU,SACf,UAAU,UAAU,SAAS,CAAC,EAAE,KAAK,IAAI,kBACzC;AAAA,MACJ,YAAY,IAAI,YAAY;AAAA,MAC5B,MAAM,IAAI,YAAY;AAAA,MACtB,OAAO,UAAU,IAAI,CAAC,UAAU;AAAA,QAC9B,MAAM,KAAK,MAAM,KAAK;AAAA,QACtB,YAAY,KAAK,OAAO,KAAK,KAAK;AAAA,QAClC,UAAU,KAAK,KAAK,KAAK,KAAK;AAAA,QAC9B,YAAY,KAAK,YAAY,KAAK;AAAA,QAClC;AAAA,MACF,EAAE;AAAA,IACJ;AAAA,EACF,CAAC;AACH;","names":["stt"]}
package/dist/stt_v2.cjs CHANGED
@@ -74,7 +74,8 @@ class STTv2 extends import_agents.stt.STT {
74
74
  constructor(opts = {}) {
75
75
  super({
76
76
  streaming: true,
77
- interimResults: true
77
+ interimResults: true,
78
+ alignedTranscript: "word"
78
79
  });
79
80
  this.#opts = { ...defaultSTTv2Options, ...opts };
80
81
  const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;
@@ -297,7 +298,7 @@ class SpeechStreamv2 extends import_agents.stt.SpeechStream {
297
298
  }
298
299
  }
299
300
  #sendTranscriptEvent(eventType, data) {
300
- const alts = parseTranscription(this.#opts.language || "en", data, 0);
301
+ const alts = parseTranscription(this.#opts.language || "en", data, this.startTimeOffset);
301
302
  if (alts.length > 0) {
302
303
  this.queue.put({
303
304
  type: eventType,
@@ -323,7 +324,6 @@ class SpeechStreamv2 extends import_agents.stt.SpeechStream {
323
324
  encoding: "linear16",
324
325
  mip_opt_out: String(this.#opts.mipOptOut)
325
326
  };
326
- if (this.#opts.language) params.language = this.#opts.language;
327
327
  if (this.#opts.eagerEotThreshold)
328
328
  params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();
329
329
  if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();
@@ -342,21 +342,31 @@ class SpeechStreamv2 extends import_agents.stt.SpeechStream {
342
342
  }
343
343
  function parseTranscription(language, data, startTimeOffset) {
344
344
  const transcript = data.transcript;
345
- const words = data.words || [];
346
- if (!words || words.length === 0) {
345
+ const wordsData = data.words || [];
346
+ if (!wordsData || wordsData.length === 0) {
347
347
  return [];
348
348
  }
349
349
  let confidence = 0;
350
- if (words.length > 0) {
351
- const sum = words.reduce((acc, w) => acc + (w.confidence || 0), 0);
352
- confidence = sum / words.length;
350
+ if (wordsData.length > 0) {
351
+ const sum = wordsData.reduce((acc, w) => acc + (w.confidence || 0), 0);
352
+ confidence = sum / wordsData.length;
353
353
  }
354
354
  const sd = {
355
355
  language,
356
356
  startTime: (data.audio_window_start || 0) + startTimeOffset,
357
357
  endTime: (data.audio_window_end || 0) + startTimeOffset,
358
358
  confidence,
359
- text: transcript || ""
359
+ text: transcript || "",
360
+ // Note: Deepgram V2 (Flux) API does not provide word-level timing (start/end).
361
+ // Words only contain 'word' and 'confidence' fields, so startTime/endTime will be 0.
362
+ // See: https://developers.deepgram.com/docs/flux/nova-3-migration
363
+ words: wordsData.map((word) => ({
364
+ text: word.word ?? "",
365
+ startTime: (word.start ?? 0) + startTimeOffset,
366
+ endTime: (word.end ?? 0) + startTimeOffset,
367
+ confidence: word.confidence ?? 0,
368
+ startTimeOffset
369
+ }))
360
370
  };
361
371
  return [sd];
362
372
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt_v2.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Event,\n calculateAudioDurationSeconds,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport * as queryString from 'node:querystring';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { V2Models } from './models.js';\n\nconst _CLOSE_MSG = JSON.stringify({ type: 'CloseStream' });\n\n// --- Configuration ---\n\n/**\n * Configuration options for STTv2 (Deepgram Flux model).\n */\nexport interface STTv2Options {\n apiKey?: string;\n model: V2Models | string;\n sampleRate: number;\n keyterms: string[];\n endpointUrl: string;\n language?: string;\n eagerEotThreshold?: number;\n eotThreshold?: number;\n eotTimeoutMs?: number;\n mipOptOut?: boolean;\n tags?: string[];\n}\n\nconst defaultSTTv2Options: Omit<STTv2Options, 'apiKey'> = {\n model: 'flux-general-en',\n sampleRate: 16000,\n keyterms: [],\n endpointUrl: 'wss://api.deepgram.com/v2/listen',\n language: 'en',\n mipOptOut: false,\n};\n\nfunction validateTags(tags: string[]): string[] {\n for (const tag of tags) {\n if (tag.length > 128) {\n throw new Error('tag must be no more than 128 characters');\n }\n }\n return tags;\n}\n\n/**\n * Deepgram STTv2 using the Flux model for streaming speech-to-text.\n *\n * This uses Deepgram's V2 API (`/v2/listen`) which provides turn-based\n * transcription with support for preemptive generation.\n *\n * @remarks\n * Key differences from STT (V1):\n * - Uses `TurnInfo` events instead of `SpeechStarted`/`Results`\n * - Supports `eagerEotThreshold` for preemptive LLM generation\n * - Sends `PREFLIGHT_TRANSCRIPT` events when eager end-of-turn is detected\n *\n * @example\n * ```typescript\n * import { STTv2 } from '@livekit/agents-plugin-deepgram';\n *\n * const stt = new STTv2({\n * model: 'flux-general-en',\n * eagerEotThreshold: 0.5, // Enable preemptive generation\n * });\n *\n * const stream = stt.stream();\n * stream.pushFrame(audioFrame);\n *\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives?.[0]?.text);\n * }\n * }\n * ```\n */\nexport class STTv2 extends stt.STT {\n readonly label = 'deepgram.STTv2';\n #opts: STTv2Options;\n #apiKey: string;\n #logger = log();\n\n /**\n * Create a new Deepgram STTv2 instance.\n *\n * @param opts - Configuration options\n * @param opts.apiKey - Deepgram API key (defaults to `DEEPGRAM_API_KEY` env var)\n * @param opts.model - Model to use (default: `flux-general-en`)\n * @param opts.eagerEotThreshold - Threshold (0.3-0.9) for preemptive generation\n * @param opts.eotThreshold - End-of-turn detection threshold (default: 0.7)\n * @param opts.eotTimeoutMs - End-of-turn timeout in ms (default: 3000)\n * @param opts.keyterms - List of key terms to improve recognition\n * @param opts.tags - Tags for usage reporting (max 128 chars each)\n *\n * @throws Error if no API key is provided\n */\n constructor(opts: Partial<STTv2Options> = {}) {\n super({\n streaming: true,\n interimResults: true,\n });\n\n this.#opts = { ...defaultSTTv2Options, ...opts };\n\n const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;\n if (!apiKey) {\n throw new Error('Deepgram API key is required');\n }\n this.#apiKey = apiKey;\n\n if (this.#opts.tags) {\n this.#opts.tags = validateTags(this.#opts.tags);\n }\n }\n\n /** The model being used for transcription */\n get model(): string {\n return this.#opts.model;\n }\n\n /** The STT provider name */\n get provider(): string {\n return 'Deepgram';\n }\n\n protected async _recognize(\n _frame: AudioFrame | AudioFrame[],\n _abortSignal?: AbortSignal,\n ): Promise<stt.SpeechEvent> {\n throw new Error('V2 API does not support non-streaming recognize. Use .stream()');\n }\n\n /**\n * Create a new streaming transcription session.\n *\n * @param options - Stream options\n * @returns A SpeechStream that emits transcription events\n */\n stream(options?: { connOptions?: APIConnectOptions }): stt.SpeechStream {\n const streamOpts = { ...this.#opts, apiKey: this.#apiKey };\n return new SpeechStreamv2(this, streamOpts, options?.connOptions);\n }\n\n /**\n * Update STT options. Changes will take effect on the next stream.\n *\n * @param opts - Partial options to update\n */\n updateOptions(opts: Partial<STTv2Options>) {\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n this.#logger.debug('Updated STTv2 options');\n }\n}\n\n// --- Stream Implementation ---\n\nclass SpeechStreamv2 extends stt.SpeechStream {\n readonly label = 'deepgram.SpeechStreamv2';\n #opts: STTv2Options & { apiKey: string };\n #logger = log();\n #ws: WebSocket | null = null;\n\n #audioDurationCollector: PeriodicCollector<number>;\n #requestId = '';\n #speaking = false;\n\n // Parity: _reconnect_event - using existing Event class from @livekit/agents\n #reconnectEvent = new Event();\n\n constructor(\n sttInstance: STTv2,\n opts: STTv2Options & { apiKey: string },\n connOptions?: APIConnectOptions,\n ) {\n super(sttInstance, opts.sampleRate, connOptions);\n this.#opts = opts;\n\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.#onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n updateOptions(opts: Partial<STTv2Options>) {\n this.#logger.debug('Stream received option update', opts);\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n\n // Trigger reconnection loop\n this.#reconnectEvent.set();\n }\n\n protected async run() {\n // Outer Loop: Handles reconnections (Configuration updates)\n while (!this.closed) {\n try {\n this.#reconnectEvent.clear();\n\n const url = this.#getDeepgramUrl();\n this.#logger.debug(`Connecting to Deepgram: ${url}`);\n\n this.#ws = new WebSocket(url, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n // 1. Wait for Connection Open\n await new Promise<void>((resolve, reject) => {\n if (!this.#ws) return reject(new Error('WebSocket not initialized'));\n\n const onOpen = () => {\n this.#ws?.off('error', onError);\n resolve();\n };\n const onError = (err: Error) => {\n this.#ws?.off('open', onOpen);\n reject(err);\n };\n\n this.#ws.once('open', onOpen);\n this.#ws.once('error', onError);\n });\n\n // 2. Run Concurrent Tasks (Send & Receive)\n const sendPromise = this.#sendTask();\n const recvPromise = this.#recvTask();\n const reconnectWait = this.#reconnectEvent.wait();\n\n // 3. Race: Normal Completion vs Reconnect Signal\n const result = await Promise.race([\n Promise.all([sendPromise, recvPromise]),\n reconnectWait.then(() => 'RECONNECT'),\n ]);\n\n if (result === 'RECONNECT') {\n this.#logger.debug('Reconnecting stream due to option update...');\n // Close current socket; loop will restart and open a new one\n this.#ws.close();\n } else {\n // Normal finish (Stream ended or Error thrown)\n break;\n }\n } catch (error) {\n this.#logger.error('Deepgram stream error', { error });\n throw error; // Let Base Class handle retry logic\n } finally {\n if (this.#ws?.readyState === WebSocket.OPEN) {\n this.#ws.close();\n }\n }\n }\n this.close();\n }\n\n async #sendTask() {\n if (!this.#ws) return;\n\n // Buffer audio into 50ms chunks (Parity)\n const samples50ms = Math.floor(this.#opts.sampleRate / 20);\n const audioBstream = new AudioByteStream(this.#opts.sampleRate, 1, samples50ms);\n\n let hasEnded = false;\n\n // Manual Iterator to allow racing against Reconnect Signal\n const iterator = this.input[Symbol.asyncIterator]();\n\n while (true) {\n const nextPromise = iterator.next();\n // If reconnect signal fires, abort the wait\n const abortPromise = this.#reconnectEvent.wait().then(() => ({ abort: true }) as const);\n\n const result = await Promise.race([nextPromise, abortPromise]);\n\n // Check if we need to abort (Reconnect) or if stream is done\n if ('abort' in result || result.done) {\n if (!('abort' in result) && result.done) {\n // Normal stream end\n hasEnded = true;\n } else {\n // Reconnect triggered - break loop immediately\n break;\n }\n }\n\n // If we broke above, we don't process data. If not, 'result' is IteratorResult\n if (hasEnded && !('value' in result)) {\n // Process flush below\n } else if ('value' in result) {\n const data = result.value;\n const frames: AudioFrame[] = [];\n\n if (data === SpeechStreamv2.FLUSH_SENTINEL) {\n frames.push(...audioBstream.flush());\n hasEnded = true;\n } else {\n frames.push(...audioBstream.write((data as AudioFrame).data.buffer as ArrayBuffer));\n }\n\n for (const frame of frames) {\n this.#audioDurationCollector.push(calculateAudioDurationSeconds(frame));\n\n if (this.#ws!.readyState === WebSocket.OPEN) {\n this.#ws!.send(frame.data);\n }\n\n if (hasEnded) {\n this.#audioDurationCollector.flush();\n hasEnded = false;\n }\n }\n }\n\n if (hasEnded) break;\n }\n\n // Only send CloseStream if we are exiting normally (not reconnecting)\n if (!this.#reconnectEvent.isSet && this.#ws!.readyState === WebSocket.OPEN) {\n this.#logger.debug('Sending CloseStream message to Deepgram');\n this.#ws!.send(_CLOSE_MSG);\n }\n }\n\n async #recvTask() {\n if (!this.#ws) return;\n\n return new Promise<void>((resolve) => {\n if (!this.#ws) return resolve();\n\n this.#ws.on('message', (data: Buffer, isBinary: boolean) => {\n if (isBinary) {\n this.#logger.warn('Received unexpected binary message from Deepgram');\n return;\n }\n try {\n const msg = JSON.parse(data.toString());\n this.#processStreamEvent(msg);\n } catch (error) {\n this.#logger.error('Failed to parse Deepgram message', { error });\n }\n });\n\n this.#ws.on('close', (code, reason) => {\n this.#logger.debug(`Deepgram WebSocket closed: ${code} ${reason}`);\n resolve();\n });\n\n // Errors are caught by run() listener, resolve here to clean up task\n this.#ws.on('error', () => resolve());\n });\n }\n\n #processStreamEvent(data: Record<string, unknown>) {\n if (data.request_id) {\n this.#requestId = data.request_id as string;\n }\n\n if (data.type === 'TurnInfo') {\n const eventType = data.event;\n\n if (eventType === 'StartOfTurn') {\n if (this.#speaking) return;\n\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n requestId: this.#requestId,\n });\n\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'Update') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EagerEndOfTurn') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, data);\n } else if (eventType === 'TurnResumed') {\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EndOfTurn') {\n if (!this.#speaking) return;\n\n this.#speaking = false;\n this.#sendTranscriptEvent(stt.SpeechEventType.FINAL_TRANSCRIPT, data);\n\n this.queue.put({\n type: stt.SpeechEventType.END_OF_SPEECH,\n requestId: this.#requestId,\n });\n }\n } else if (data.type === 'Error') {\n this.#logger.warn('deepgram sent an error', { data });\n const desc = (data.description as string) || 'unknown error from deepgram';\n throw new Error(`Deepgram API Error: ${desc}`);\n }\n }\n\n #sendTranscriptEvent(eventType: stt.SpeechEventType, data: Record<string, unknown>) {\n // Note: start_time_offset is not yet available in the TypeScript base class\n // Using 0.0 for now - full parity would require base class changes\n const alts = parseTranscription(this.#opts.language || 'en', data, 0.0);\n\n if (alts.length > 0) {\n this.queue.put({\n type: eventType,\n requestId: this.#requestId,\n alternatives: [alts[0]!, ...alts.slice(1)],\n });\n }\n }\n\n #onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n\n #getDeepgramUrl(): string {\n const params: Record<string, string | string[]> = {\n model: this.#opts.model,\n sample_rate: this.#opts.sampleRate.toString(),\n encoding: 'linear16',\n mip_opt_out: String(this.#opts.mipOptOut),\n };\n\n if (this.#opts.language) params.language = this.#opts.language;\n if (this.#opts.eagerEotThreshold)\n params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();\n if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();\n if (this.#opts.eotTimeoutMs) params.eot_timeout_ms = this.#opts.eotTimeoutMs.toString();\n\n if (this.#opts.keyterms.length > 0) params.keyterm = this.#opts.keyterms;\n if (this.#opts.tags && this.#opts.tags.length > 0) params.tag = this.#opts.tags;\n\n const baseUrl = this.#opts.endpointUrl.replace(/^http/, 'ws');\n const qs = queryString.stringify(params);\n return `${baseUrl}?${qs}`;\n }\n\n override close() {\n super.close();\n this.#ws?.close();\n }\n}\n\n// --- Helpers ---\n\nfunction parseTranscription(\n language: string,\n data: Record<string, unknown>,\n startTimeOffset: number,\n): stt.SpeechData[] {\n const transcript = data.transcript as string | undefined;\n const words = (data.words as Array<Record<string, unknown>>) || [];\n\n if (!words || words.length === 0) {\n return [];\n }\n\n let confidence = 0;\n if (words.length > 0) {\n const sum = words.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);\n confidence = sum / words.length;\n }\n\n const sd: stt.SpeechData = {\n language: language,\n startTime: ((data.audio_window_start as number) || 0) + startTimeOffset,\n endTime: ((data.audio_window_end as number) || 0) + startTimeOffset,\n confidence: confidence,\n text: transcript || '',\n };\n\n return [sd];\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,kBAA6B;AAC7B,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,aAAa,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC;AAqBzD,MAAM,sBAAoD;AAAA,EACxD,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU,CAAC;AAAA,EACX,aAAa;AAAA,EACb,UAAU;AAAA,EACV,WAAW;AACb;AAEA,SAAS,aAAa,MAA0B;AAC9C,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,SAAS,KAAK;AACpB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC3D;AAAA,EACF;AACA,SAAO;AACT;AAiCO,MAAM,cAAc,kBAAI,IAAI;AAAA,EACxB,QAAQ;AAAA,EACjB;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBd,YAAY,OAA8B,CAAC,GAAG;AAC5C,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB;AAAA,IAClB,CAAC;AAED,SAAK,QAAQ,EAAE,GAAG,qBAAqB,GAAG,KAAK;AAE/C,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B;AAAA,IAChD;AACA,SAAK,UAAU;AAEf,QAAI,KAAK,MAAM,MAAM;AACnB,WAAK,MAAM,OAAO,aAAa,KAAK,MAAM,IAAI;AAAA,IAChD;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,QAAgB;AAClB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,WACd,QACA,cAC0B;AAC1B,UAAM,IAAI,MAAM,gEAAgE;AAAA,EAClF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,OAAO,SAAiE;AACtE,UAAM,aAAa,EAAE,GAAG,KAAK,OAAO,QAAQ,KAAK,QAAQ;AACzD,WAAO,IAAI,eAAe,MAAM,YAAY,mCAAS,WAAW;AAAA,EAClE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6B;AACzC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AACvD,SAAK,QAAQ,MAAM,uBAAuB;AAAA,EAC5C;AACF;AAIA,MAAM,uBAAuB,kBAAI,aAAa;AAAA,EACnC,QAAQ;AAAA,EACjB;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,MAAwB;AAAA,EAExB;AAAA,EACA,aAAa;AAAA,EACb,YAAY;AAAA;AAAA,EAGZ,kBAAkB,IAAI,oBAAM;AAAA,EAE5B,YACE,aACA,MACA,aACA;AACA,UAAM,aAAa,KAAK,YAAY,WAAW;AAC/C,SAAK,QAAQ;AAEb,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,uBAAuB,QAAQ;AAAA,MAClD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,cAAc,MAA6B;AACzC,SAAK,QAAQ,MAAM,iCAAiC,IAAI;AACxD,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AAGvD,SAAK,gBAAgB,IAAI;AAAA,EAC3B;AAAA,EAEA,MAAgB,MAAM;AA5MxB;AA8MI,WAAO,CAAC,KAAK,QAAQ;AACnB,UAAI;AACF,aAAK,gBAAgB,MAAM;AAE3B,cAAM,MAAM,KAAK,gBAAgB;AACjC,aAAK,QAAQ,MAAM,2BAA2B,GAAG,EAAE;AAEnD,aAAK,MAAM,IAAI,oBAAU,KAAK;AAAA,UAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,QACzD,CAAC;AAGD,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,cAAI,CAAC,KAAK,IAAK,QAAO,OAAO,IAAI,MAAM,2BAA2B,CAAC;AAEnE,gBAAM,SAAS,MAAM;AA7N/B,gBAAAA;AA8NY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,SAAS;AACvB,oBAAQ;AAAA,UACV;AACA,gBAAM,UAAU,CAAC,QAAe;AAjO1C,gBAAAA;AAkOY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,QAAQ;AACtB,mBAAO,GAAG;AAAA,UACZ;AAEA,eAAK,IAAI,KAAK,QAAQ,MAAM;AAC5B,eAAK,IAAI,KAAK,SAAS,OAAO;AAAA,QAChC,CAAC;AAGD,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,gBAAgB,KAAK,gBAAgB,KAAK;AAGhD,cAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,UAChC,QAAQ,IAAI,CAAC,aAAa,WAAW,CAAC;AAAA,UACtC,cAAc,KAAK,MAAM,WAAW;AAAA,QACtC,CAAC;AAED,YAAI,WAAW,aAAa;AAC1B,eAAK,QAAQ,MAAM,6CAA6C;AAEhE,eAAK,IAAI,MAAM;AAAA,QACjB,OAAO;AAEL;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AACd,aAAK,QAAQ,MAAM,yBAAyB,EAAE,MAAM,CAAC;AACrD,cAAM;AAAA,MACR,UAAE;AACA,cAAI,UAAK,QAAL,mBAAU,gBAAe,oBAAU,MAAM;AAC3C,eAAK,IAAI,MAAM;AAAA,QACjB;AAAA,MACF;AAAA,IACF;AACA,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAGf,UAAM,cAAc,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AACzD,UAAM,eAAe,IAAI,8BAAgB,KAAK,MAAM,YAAY,GAAG,WAAW;AAE9E,QAAI,WAAW;AAGf,UAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAElD,WAAO,MAAM;AACX,YAAM,cAAc,SAAS,KAAK;AAElC,YAAM,eAAe,KAAK,gBAAgB,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,KAAK,EAAW;AAEtF,YAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,aAAa,YAAY,CAAC;AAG7D,UAAI,WAAW,UAAU,OAAO,MAAM;AACpC,YAAI,EAAE,WAAW,WAAW,OAAO,MAAM;AAEvC,qBAAW;AAAA,QACb,OAAO;AAEL;AAAA,QACF;AAAA,MACF;AAGA,UAAI,YAAY,EAAE,WAAW,SAAS;AAAA,MAEtC,WAAW,WAAW,QAAQ;AAC5B,cAAM,OAAO,OAAO;AACpB,cAAM,SAAuB,CAAC;AAE9B,YAAI,SAAS,eAAe,gBAAgB;AAC1C,iBAAO,KAAK,GAAG,aAAa,MAAM,CAAC;AACnC,qBAAW;AAAA,QACb,OAAO;AACL,iBAAO,KAAK,GAAG,aAAa,MAAO,KAAoB,KAAK,MAAqB,CAAC;AAAA,QACpF;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,wBAAwB,SAAK,6CAA8B,KAAK,CAAC;AAEtE,cAAI,KAAK,IAAK,eAAe,oBAAU,MAAM;AAC3C,iBAAK,IAAK,KAAK,MAAM,IAAI;AAAA,UAC3B;AAEA,cAAI,UAAU;AACZ,iBAAK,wBAAwB,MAAM;AACnC,uBAAW;AAAA,UACb;AAAA,QACF;AAAA,MACF;AAEA,UAAI,SAAU;AAAA,IAChB;AAGA,QAAI,CAAC,KAAK,gBAAgB,SAAS,KAAK,IAAK,eAAe,oBAAU,MAAM;AAC1E,WAAK,QAAQ,MAAM,yCAAyC;AAC5D,WAAK,IAAK,KAAK,UAAU;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAEf,WAAO,IAAI,QAAc,CAAC,YAAY;AACpC,UAAI,CAAC,KAAK,IAAK,QAAO,QAAQ;AAE9B,WAAK,IAAI,GAAG,WAAW,CAAC,MAAc,aAAsB;AAC1D,YAAI,UAAU;AACZ,eAAK,QAAQ,KAAK,kDAAkD;AACpE;AAAA,QACF;AACA,YAAI;AACF,gBAAM,MAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AACtC,eAAK,oBAAoB,GAAG;AAAA,QAC9B,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,oCAAoC,EAAE,MAAM,CAAC;AAAA,QAClE;AAAA,MACF,CAAC;AAED,WAAK,IAAI,GAAG,SAAS,CAAC,MAAM,WAAW;AACrC,aAAK,QAAQ,MAAM,8BAA8B,IAAI,IAAI,MAAM,EAAE;AACjE,gBAAQ;AAAA,MACV,CAAC;AAGD,WAAK,IAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAAA,IACtC,CAAC;AAAA,EACH;AAAA,EAEA,oBAAoB,MAA+B;AACjD,QAAI,KAAK,YAAY;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,QAAI,KAAK,SAAS,YAAY;AAC5B,YAAM,YAAY,KAAK;AAEvB,UAAI,cAAc,eAAe;AAC/B,YAAI,KAAK,UAAW;AAEpB,aAAK,YAAY;AACjB,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,kBAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAED,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,UAAU;AACjC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,kBAAkB;AACzC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,kBAAI,gBAAgB,sBAAsB,IAAI;AAAA,MAC1E,WAAW,cAAc,eAAe;AACtC,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,aAAa;AACpC,YAAI,CAAC,KAAK,UAAW;AAErB,aAAK,YAAY;AACjB,aAAK,qBAAqB,kBAAI,gBAAgB,kBAAkB,IAAI;AAEpE,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,kBAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAAA,MACH;AAAA,IACF,WAAW,KAAK,SAAS,SAAS;AAChC,WAAK,QAAQ,KAAK,0BAA0B,EAAE,KAAK,CAAC;AACpD,YAAM,OAAQ,KAAK,eAA0B;AAC7C,YAAM,IAAI,MAAM,uBAAuB,IAAI,EAAE;AAAA,IAC/C;AAAA,EACF;AAAA,EAEA,qBAAqB,WAAgC,MAA+B;AAGlF,UAAM,OAAO,mBAAmB,KAAK,MAAM,YAAY,MAAM,MAAM,CAAG;AAEtE,QAAI,KAAK,SAAS,GAAG;AACnB,WAAK,MAAM,IAAI;AAAA,QACb,MAAM;AAAA,QACN,WAAW,KAAK;AAAA,QAChB,cAAc,CAAC,KAAK,CAAC,GAAI,GAAG,KAAK,MAAM,CAAC,CAAC;AAAA,MAC3C,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,uBAAuB,UAAkB;AACvC,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AAAA,EAEA,kBAA0B;AACxB,UAAM,SAA4C;AAAA,MAChD,OAAO,KAAK,MAAM;AAAA,MAClB,aAAa,KAAK,MAAM,WAAW,SAAS;AAAA,MAC5C,UAAU;AAAA,MACV,aAAa,OAAO,KAAK,MAAM,SAAS;AAAA,IAC1C;AAEA,QAAI,KAAK,MAAM,SAAU,QAAO,WAAW,KAAK,MAAM;AACtD,QAAI,KAAK,MAAM;AACb,aAAO,sBAAsB,KAAK,MAAM,kBAAkB,SAAS;AACrE,QAAI,KAAK,MAAM,aAAc,QAAO,gBAAgB,KAAK,MAAM,aAAa,SAAS;AACrF,QAAI,KAAK,MAAM,aAAc,QAAO,iBAAiB,KAAK,MAAM,aAAa,SAAS;AAEtF,QAAI,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO,UAAU,KAAK,MAAM;AAChE,QAAI,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,SAAS,EAAG,QAAO,MAAM,KAAK,MAAM;AAE3E,UAAM,UAAU,KAAK,MAAM,YAAY,QAAQ,SAAS,IAAI;AAC5D,UAAM,KAAK,YAAY,UAAU,MAAM;AACvC,WAAO,GAAG,OAAO,IAAI,EAAE;AAAA,EACzB;AAAA,EAES,QAAQ;AArcnB;AAscI,UAAM,MAAM;AACZ,eAAK,QAAL,mBAAU;AAAA,EACZ;AACF;AAIA,SAAS,mBACP,UACA,MACA,iBACkB;AAClB,QAAM,aAAa,KAAK;AACxB,QAAM,QAAS,KAAK,SAA4C,CAAC;AAEjE,MAAI,CAAC,SAAS,MAAM,WAAW,GAAG;AAChC,WAAO,CAAC;AAAA,EACV;AAEA,MAAI,aAAa;AACjB,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,MAAM,MAAM,OAAO,CAAC,KAAa,MAAM,OAAQ,EAAE,cAAyB,IAAI,CAAC;AACrF,iBAAa,MAAM,MAAM;AAAA,EAC3B;AAEA,QAAM,KAAqB;AAAA,IACzB;AAAA,IACA,YAAa,KAAK,sBAAiC,KAAK;AAAA,IACxD,UAAW,KAAK,oBAA+B,KAAK;AAAA,IACpD;AAAA,IACA,MAAM,cAAc;AAAA,EACtB;AAEA,SAAO,CAAC,EAAE;AACZ;","names":["_a"]}
1
+ {"version":3,"sources":["../src/stt_v2.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Event,\n calculateAudioDurationSeconds,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport * as queryString from 'node:querystring';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { V2Models } from './models.js';\n\nconst _CLOSE_MSG = JSON.stringify({ type: 'CloseStream' });\n\n// --- Configuration ---\n\n/**\n * Configuration options for STTv2 (Deepgram Flux model).\n */\nexport interface STTv2Options {\n apiKey?: string;\n model: V2Models | string;\n sampleRate: number;\n keyterms: string[];\n endpointUrl: string;\n language?: string;\n eagerEotThreshold?: number;\n eotThreshold?: number;\n eotTimeoutMs?: number;\n mipOptOut?: boolean;\n tags?: string[];\n}\n\nconst defaultSTTv2Options: Omit<STTv2Options, 'apiKey'> = {\n model: 'flux-general-en',\n sampleRate: 16000,\n keyterms: [],\n endpointUrl: 'wss://api.deepgram.com/v2/listen',\n language: 'en',\n mipOptOut: false,\n};\n\nfunction validateTags(tags: string[]): string[] {\n for (const tag of tags) {\n if (tag.length > 128) {\n throw new Error('tag must be no more than 128 characters');\n }\n }\n return tags;\n}\n\n/**\n * Deepgram STTv2 using the Flux model for streaming speech-to-text.\n *\n * This uses Deepgram's V2 API (`/v2/listen`) which provides turn-based\n * transcription with support for preemptive generation.\n *\n * @remarks\n * Key differences from STT (V1):\n * - Uses `TurnInfo` events instead of `SpeechStarted`/`Results`\n * - Supports `eagerEotThreshold` for preemptive LLM generation\n * - Sends `PREFLIGHT_TRANSCRIPT` events when eager end-of-turn is detected\n *\n * @example\n * ```typescript\n * import { STTv2 } from '@livekit/agents-plugin-deepgram';\n *\n * const stt = new STTv2({\n * model: 'flux-general-en',\n * eagerEotThreshold: 0.5, // Enable preemptive generation\n * });\n *\n * const stream = stt.stream();\n * stream.pushFrame(audioFrame);\n *\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives?.[0]?.text);\n * }\n * }\n * ```\n */\nexport class STTv2 extends stt.STT {\n readonly label = 'deepgram.STTv2';\n #opts: STTv2Options;\n #apiKey: string;\n #logger = log();\n\n /**\n * Create a new Deepgram STTv2 instance.\n *\n * @param opts - Configuration options\n * @param opts.apiKey - Deepgram API key (defaults to `DEEPGRAM_API_KEY` env var)\n * @param opts.model - Model to use (default: `flux-general-en`)\n * @param opts.eagerEotThreshold - Threshold (0.3-0.9) for preemptive generation\n * @param opts.eotThreshold - End-of-turn detection threshold (default: 0.7)\n * @param opts.eotTimeoutMs - End-of-turn timeout in ms (default: 3000)\n * @param opts.keyterms - List of key terms to improve recognition\n * @param opts.tags - Tags for usage reporting (max 128 chars each)\n *\n * @throws Error if no API key is provided\n */\n constructor(opts: Partial<STTv2Options> = {}) {\n super({\n streaming: true,\n interimResults: true,\n alignedTranscript: 'word',\n });\n\n this.#opts = { ...defaultSTTv2Options, ...opts };\n\n const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;\n if (!apiKey) {\n throw new Error('Deepgram API key is required');\n }\n this.#apiKey = apiKey;\n\n if (this.#opts.tags) {\n this.#opts.tags = validateTags(this.#opts.tags);\n }\n }\n\n /** The model being used for transcription */\n get model(): string {\n return this.#opts.model;\n }\n\n /** The STT provider name */\n get provider(): string {\n return 'Deepgram';\n }\n\n protected async _recognize(\n _frame: AudioFrame | AudioFrame[],\n _abortSignal?: AbortSignal,\n ): Promise<stt.SpeechEvent> {\n throw new Error('V2 API does not support non-streaming recognize. Use .stream()');\n }\n\n /**\n * Create a new streaming transcription session.\n *\n * @param options - Stream options\n * @returns A SpeechStream that emits transcription events\n */\n stream(options?: { connOptions?: APIConnectOptions }): stt.SpeechStream {\n const streamOpts = { ...this.#opts, apiKey: this.#apiKey };\n return new SpeechStreamv2(this, streamOpts, options?.connOptions);\n }\n\n /**\n * Update STT options. Changes will take effect on the next stream.\n *\n * @param opts - Partial options to update\n */\n updateOptions(opts: Partial<STTv2Options>) {\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n this.#logger.debug('Updated STTv2 options');\n }\n}\n\n// --- Stream Implementation ---\n\nclass SpeechStreamv2 extends stt.SpeechStream {\n readonly label = 'deepgram.SpeechStreamv2';\n #opts: STTv2Options & { apiKey: string };\n #logger = log();\n #ws: WebSocket | null = null;\n\n #audioDurationCollector: PeriodicCollector<number>;\n #requestId = '';\n #speaking = false;\n\n // Parity: _reconnect_event - using existing Event class from @livekit/agents\n #reconnectEvent = new Event();\n\n constructor(\n sttInstance: STTv2,\n opts: STTv2Options & { apiKey: string },\n connOptions?: APIConnectOptions,\n ) {\n super(sttInstance, opts.sampleRate, connOptions);\n this.#opts = opts;\n\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.#onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n updateOptions(opts: Partial<STTv2Options>) {\n this.#logger.debug('Stream received option update', opts);\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n\n // Trigger reconnection loop\n this.#reconnectEvent.set();\n }\n\n protected async run() {\n // Outer Loop: Handles reconnections (Configuration updates)\n while (!this.closed) {\n try {\n this.#reconnectEvent.clear();\n\n const url = this.#getDeepgramUrl();\n this.#logger.debug(`Connecting to Deepgram: ${url}`);\n\n this.#ws = new WebSocket(url, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n // 1. Wait for Connection Open\n await new Promise<void>((resolve, reject) => {\n if (!this.#ws) return reject(new Error('WebSocket not initialized'));\n\n const onOpen = () => {\n this.#ws?.off('error', onError);\n resolve();\n };\n const onError = (err: Error) => {\n this.#ws?.off('open', onOpen);\n reject(err);\n };\n\n this.#ws.once('open', onOpen);\n this.#ws.once('error', onError);\n });\n\n // 2. Run Concurrent Tasks (Send & Receive)\n const sendPromise = this.#sendTask();\n const recvPromise = this.#recvTask();\n const reconnectWait = this.#reconnectEvent.wait();\n\n // 3. Race: Normal Completion vs Reconnect Signal\n const result = await Promise.race([\n Promise.all([sendPromise, recvPromise]),\n reconnectWait.then(() => 'RECONNECT'),\n ]);\n\n if (result === 'RECONNECT') {\n this.#logger.debug('Reconnecting stream due to option update...');\n // Close current socket; loop will restart and open a new one\n this.#ws.close();\n } else {\n // Normal finish (Stream ended or Error thrown)\n break;\n }\n } catch (error) {\n this.#logger.error('Deepgram stream error', { error });\n throw error; // Let Base Class handle retry logic\n } finally {\n if (this.#ws?.readyState === WebSocket.OPEN) {\n this.#ws.close();\n }\n }\n }\n this.close();\n }\n\n async #sendTask() {\n if (!this.#ws) return;\n\n // Buffer audio into 50ms chunks (Parity)\n const samples50ms = Math.floor(this.#opts.sampleRate / 20);\n const audioBstream = new AudioByteStream(this.#opts.sampleRate, 1, samples50ms);\n\n let hasEnded = false;\n\n // Manual Iterator to allow racing against Reconnect Signal\n const iterator = this.input[Symbol.asyncIterator]();\n\n while (true) {\n const nextPromise = iterator.next();\n // If reconnect signal fires, abort the wait\n const abortPromise = this.#reconnectEvent.wait().then(() => ({ abort: true }) as const);\n\n const result = await Promise.race([nextPromise, abortPromise]);\n\n // Check if we need to abort (Reconnect) or if stream is done\n if ('abort' in result || result.done) {\n if (!('abort' in result) && result.done) {\n // Normal stream end\n hasEnded = true;\n } else {\n // Reconnect triggered - break loop immediately\n break;\n }\n }\n\n // If we broke above, we don't process data. If not, 'result' is IteratorResult\n if (hasEnded && !('value' in result)) {\n // Process flush below\n } else if ('value' in result) {\n const data = result.value;\n const frames: AudioFrame[] = [];\n\n if (data === SpeechStreamv2.FLUSH_SENTINEL) {\n frames.push(...audioBstream.flush());\n hasEnded = true;\n } else {\n frames.push(...audioBstream.write((data as AudioFrame).data.buffer as ArrayBuffer));\n }\n\n for (const frame of frames) {\n this.#audioDurationCollector.push(calculateAudioDurationSeconds(frame));\n\n if (this.#ws!.readyState === WebSocket.OPEN) {\n this.#ws!.send(frame.data);\n }\n\n if (hasEnded) {\n this.#audioDurationCollector.flush();\n hasEnded = false;\n }\n }\n }\n\n if (hasEnded) break;\n }\n\n // Only send CloseStream if we are exiting normally (not reconnecting)\n if (!this.#reconnectEvent.isSet && this.#ws!.readyState === WebSocket.OPEN) {\n this.#logger.debug('Sending CloseStream message to Deepgram');\n this.#ws!.send(_CLOSE_MSG);\n }\n }\n\n async #recvTask() {\n if (!this.#ws) return;\n\n return new Promise<void>((resolve) => {\n if (!this.#ws) return resolve();\n\n this.#ws.on('message', (data: Buffer, isBinary: boolean) => {\n if (isBinary) {\n this.#logger.warn('Received unexpected binary message from Deepgram');\n return;\n }\n try {\n const msg = JSON.parse(data.toString());\n this.#processStreamEvent(msg);\n } catch (error) {\n this.#logger.error('Failed to parse Deepgram message', { error });\n }\n });\n\n this.#ws.on('close', (code, reason) => {\n this.#logger.debug(`Deepgram WebSocket closed: ${code} ${reason}`);\n resolve();\n });\n\n // Errors are caught by run() listener, resolve here to clean up task\n this.#ws.on('error', () => resolve());\n });\n }\n\n #processStreamEvent(data: Record<string, unknown>) {\n if (data.request_id) {\n this.#requestId = data.request_id as string;\n }\n\n if (data.type === 'TurnInfo') {\n const eventType = data.event;\n\n if (eventType === 'StartOfTurn') {\n if (this.#speaking) return;\n\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n requestId: this.#requestId,\n });\n\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'Update') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EagerEndOfTurn') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, data);\n } else if (eventType === 'TurnResumed') {\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EndOfTurn') {\n if (!this.#speaking) return;\n\n this.#speaking = false;\n this.#sendTranscriptEvent(stt.SpeechEventType.FINAL_TRANSCRIPT, data);\n\n this.queue.put({\n type: stt.SpeechEventType.END_OF_SPEECH,\n requestId: this.#requestId,\n });\n }\n } else if (data.type === 'Error') {\n this.#logger.warn('deepgram sent an error', { data });\n const desc = (data.description as string) || 'unknown error from deepgram';\n throw new Error(`Deepgram API Error: ${desc}`);\n }\n }\n\n #sendTranscriptEvent(eventType: stt.SpeechEventType, data: Record<string, unknown>) {\n const alts = parseTranscription(this.#opts.language || 'en', data, this.startTimeOffset);\n\n if (alts.length > 0) {\n this.queue.put({\n type: eventType,\n requestId: this.#requestId,\n alternatives: [alts[0]!, ...alts.slice(1)],\n });\n }\n }\n\n #onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n\n #getDeepgramUrl(): string {\n const params: Record<string, string | string[]> = {\n model: this.#opts.model,\n sample_rate: this.#opts.sampleRate.toString(),\n encoding: 'linear16',\n mip_opt_out: String(this.#opts.mipOptOut),\n };\n\n // Note: v2 API does NOT include 'language' parameter\n if (this.#opts.eagerEotThreshold)\n params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();\n if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();\n if (this.#opts.eotTimeoutMs) params.eot_timeout_ms = this.#opts.eotTimeoutMs.toString();\n\n if (this.#opts.keyterms.length > 0) params.keyterm = this.#opts.keyterms;\n if (this.#opts.tags && this.#opts.tags.length > 0) params.tag = this.#opts.tags;\n\n const baseUrl = this.#opts.endpointUrl.replace(/^http/, 'ws');\n const qs = queryString.stringify(params);\n return `${baseUrl}?${qs}`;\n }\n\n override close() {\n super.close();\n this.#ws?.close();\n }\n}\n\n// --- Helpers ---\n\nfunction parseTranscription(\n language: string,\n data: Record<string, unknown>,\n startTimeOffset: number,\n): stt.SpeechData[] {\n const transcript = data.transcript as string | undefined;\n const wordsData = (data.words as Array<Record<string, unknown>>) || [];\n\n if (!wordsData || wordsData.length === 0) {\n return [];\n }\n\n let confidence = 0;\n if (wordsData.length > 0) {\n const sum = wordsData.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);\n confidence = sum / wordsData.length;\n }\n\n const sd: stt.SpeechData = {\n language: language,\n startTime: ((data.audio_window_start as number) || 0) + startTimeOffset,\n endTime: ((data.audio_window_end as number) || 0) + startTimeOffset,\n confidence: confidence,\n text: transcript || '',\n // Note: Deepgram V2 (Flux) API does not provide word-level timing (start/end).\n // Words only contain 'word' and 'confidence' fields, so startTime/endTime will be 0.\n // See: https://developers.deepgram.com/docs/flux/nova-3-migration\n words: wordsData.map((word) => ({\n text: (word.word as string) ?? '',\n startTime: ((word.start as number) ?? 0) + startTimeOffset,\n endTime: ((word.end as number) ?? 0) + startTimeOffset,\n confidence: (word.confidence as number) ?? 0.0,\n startTimeOffset,\n })),\n };\n\n return [sd];\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,kBAA6B;AAC7B,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,aAAa,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC;AAqBzD,MAAM,sBAAoD;AAAA,EACxD,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU,CAAC;AAAA,EACX,aAAa;AAAA,EACb,UAAU;AAAA,EACV,WAAW;AACb;AAEA,SAAS,aAAa,MAA0B;AAC9C,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,SAAS,KAAK;AACpB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC3D;AAAA,EACF;AACA,SAAO;AACT;AAiCO,MAAM,cAAc,kBAAI,IAAI;AAAA,EACxB,QAAQ;AAAA,EACjB;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBd,YAAY,OAA8B,CAAC,GAAG;AAC5C,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AAED,SAAK,QAAQ,EAAE,GAAG,qBAAqB,GAAG,KAAK;AAE/C,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B;AAAA,IAChD;AACA,SAAK,UAAU;AAEf,QAAI,KAAK,MAAM,MAAM;AACnB,WAAK,MAAM,OAAO,aAAa,KAAK,MAAM,IAAI;AAAA,IAChD;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,QAAgB;AAClB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,WACd,QACA,cAC0B;AAC1B,UAAM,IAAI,MAAM,gEAAgE;AAAA,EAClF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,OAAO,SAAiE;AACtE,UAAM,aAAa,EAAE,GAAG,KAAK,OAAO,QAAQ,KAAK,QAAQ;AACzD,WAAO,IAAI,eAAe,MAAM,YAAY,mCAAS,WAAW;AAAA,EAClE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6B;AACzC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AACvD,SAAK,QAAQ,MAAM,uBAAuB;AAAA,EAC5C;AACF;AAIA,MAAM,uBAAuB,kBAAI,aAAa;AAAA,EACnC,QAAQ;AAAA,EACjB;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,MAAwB;AAAA,EAExB;AAAA,EACA,aAAa;AAAA,EACb,YAAY;AAAA;AAAA,EAGZ,kBAAkB,IAAI,oBAAM;AAAA,EAE5B,YACE,aACA,MACA,aACA;AACA,UAAM,aAAa,KAAK,YAAY,WAAW;AAC/C,SAAK,QAAQ;AAEb,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,uBAAuB,QAAQ;AAAA,MAClD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,cAAc,MAA6B;AACzC,SAAK,QAAQ,MAAM,iCAAiC,IAAI;AACxD,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AAGvD,SAAK,gBAAgB,IAAI;AAAA,EAC3B;AAAA,EAEA,MAAgB,MAAM;AA7MxB;AA+MI,WAAO,CAAC,KAAK,QAAQ;AACnB,UAAI;AACF,aAAK,gBAAgB,MAAM;AAE3B,cAAM,MAAM,KAAK,gBAAgB;AACjC,aAAK,QAAQ,MAAM,2BAA2B,GAAG,EAAE;AAEnD,aAAK,MAAM,IAAI,oBAAU,KAAK;AAAA,UAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,QACzD,CAAC;AAGD,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,cAAI,CAAC,KAAK,IAAK,QAAO,OAAO,IAAI,MAAM,2BAA2B,CAAC;AAEnE,gBAAM,SAAS,MAAM;AA9N/B,gBAAAA;AA+NY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,SAAS;AACvB,oBAAQ;AAAA,UACV;AACA,gBAAM,UAAU,CAAC,QAAe;AAlO1C,gBAAAA;AAmOY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,QAAQ;AACtB,mBAAO,GAAG;AAAA,UACZ;AAEA,eAAK,IAAI,KAAK,QAAQ,MAAM;AAC5B,eAAK,IAAI,KAAK,SAAS,OAAO;AAAA,QAChC,CAAC;AAGD,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,gBAAgB,KAAK,gBAAgB,KAAK;AAGhD,cAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,UAChC,QAAQ,IAAI,CAAC,aAAa,WAAW,CAAC;AAAA,UACtC,cAAc,KAAK,MAAM,WAAW;AAAA,QACtC,CAAC;AAED,YAAI,WAAW,aAAa;AAC1B,eAAK,QAAQ,MAAM,6CAA6C;AAEhE,eAAK,IAAI,MAAM;AAAA,QACjB,OAAO;AAEL;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AACd,aAAK,QAAQ,MAAM,yBAAyB,EAAE,MAAM,CAAC;AACrD,cAAM;AAAA,MACR,UAAE;AACA,cAAI,UAAK,QAAL,mBAAU,gBAAe,oBAAU,MAAM;AAC3C,eAAK,IAAI,MAAM;AAAA,QACjB;AAAA,MACF;AAAA,IACF;AACA,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAGf,UAAM,cAAc,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AACzD,UAAM,eAAe,IAAI,8BAAgB,KAAK,MAAM,YAAY,GAAG,WAAW;AAE9E,QAAI,WAAW;AAGf,UAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAElD,WAAO,MAAM;AACX,YAAM,cAAc,SAAS,KAAK;AAElC,YAAM,eAAe,KAAK,gBAAgB,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,KAAK,EAAW;AAEtF,YAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,aAAa,YAAY,CAAC;AAG7D,UAAI,WAAW,UAAU,OAAO,MAAM;AACpC,YAAI,EAAE,WAAW,WAAW,OAAO,MAAM;AAEvC,qBAAW;AAAA,QACb,OAAO;AAEL;AAAA,QACF;AAAA,MACF;AAGA,UAAI,YAAY,EAAE,WAAW,SAAS;AAAA,MAEtC,WAAW,WAAW,QAAQ;AAC5B,cAAM,OAAO,OAAO;AACpB,cAAM,SAAuB,CAAC;AAE9B,YAAI,SAAS,eAAe,gBAAgB;AAC1C,iBAAO,KAAK,GAAG,aAAa,MAAM,CAAC;AACnC,qBAAW;AAAA,QACb,OAAO;AACL,iBAAO,KAAK,GAAG,aAAa,MAAO,KAAoB,KAAK,MAAqB,CAAC;AAAA,QACpF;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,wBAAwB,SAAK,6CAA8B,KAAK,CAAC;AAEtE,cAAI,KAAK,IAAK,eAAe,oBAAU,MAAM;AAC3C,iBAAK,IAAK,KAAK,MAAM,IAAI;AAAA,UAC3B;AAEA,cAAI,UAAU;AACZ,iBAAK,wBAAwB,MAAM;AACnC,uBAAW;AAAA,UACb;AAAA,QACF;AAAA,MACF;AAEA,UAAI,SAAU;AAAA,IAChB;AAGA,QAAI,CAAC,KAAK,gBAAgB,SAAS,KAAK,IAAK,eAAe,oBAAU,MAAM;AAC1E,WAAK,QAAQ,MAAM,yCAAyC;AAC5D,WAAK,IAAK,KAAK,UAAU;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAEf,WAAO,IAAI,QAAc,CAAC,YAAY;AACpC,UAAI,CAAC,KAAK,IAAK,QAAO,QAAQ;AAE9B,WAAK,IAAI,GAAG,WAAW,CAAC,MAAc,aAAsB;AAC1D,YAAI,UAAU;AACZ,eAAK,QAAQ,KAAK,kDAAkD;AACpE;AAAA,QACF;AACA,YAAI;AACF,gBAAM,MAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AACtC,eAAK,oBAAoB,GAAG;AAAA,QAC9B,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,oCAAoC,EAAE,MAAM,CAAC;AAAA,QAClE;AAAA,MACF,CAAC;AAED,WAAK,IAAI,GAAG,SAAS,CAAC,MAAM,WAAW;AACrC,aAAK,QAAQ,MAAM,8BAA8B,IAAI,IAAI,MAAM,EAAE;AACjE,gBAAQ;AAAA,MACV,CAAC;AAGD,WAAK,IAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAAA,IACtC,CAAC;AAAA,EACH;AAAA,EAEA,oBAAoB,MAA+B;AACjD,QAAI,KAAK,YAAY;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,QAAI,KAAK,SAAS,YAAY;AAC5B,YAAM,YAAY,KAAK;AAEvB,UAAI,cAAc,eAAe;AAC/B,YAAI,KAAK,UAAW;AAEpB,aAAK,YAAY;AACjB,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,kBAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAED,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,UAAU;AACjC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,kBAAkB;AACzC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,kBAAI,gBAAgB,sBAAsB,IAAI;AAAA,MAC1E,WAAW,cAAc,eAAe;AACtC,aAAK,qBAAqB,kBAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,aAAa;AACpC,YAAI,CAAC,KAAK,UAAW;AAErB,aAAK,YAAY;AACjB,aAAK,qBAAqB,kBAAI,gBAAgB,kBAAkB,IAAI;AAEpE,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,kBAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAAA,MACH;AAAA,IACF,WAAW,KAAK,SAAS,SAAS;AAChC,WAAK,QAAQ,KAAK,0BAA0B,EAAE,KAAK,CAAC;AACpD,YAAM,OAAQ,KAAK,eAA0B;AAC7C,YAAM,IAAI,MAAM,uBAAuB,IAAI,EAAE;AAAA,IAC/C;AAAA,EACF;AAAA,EAEA,qBAAqB,WAAgC,MAA+B;AAClF,UAAM,OAAO,mBAAmB,KAAK,MAAM,YAAY,MAAM,MAAM,KAAK,eAAe;AAEvF,QAAI,KAAK,SAAS,GAAG;AACnB,WAAK,MAAM,IAAI;AAAA,QACb,MAAM;AAAA,QACN,WAAW,KAAK;AAAA,QAChB,cAAc,CAAC,KAAK,CAAC,GAAI,GAAG,KAAK,MAAM,CAAC,CAAC;AAAA,MAC3C,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,uBAAuB,UAAkB;AACvC,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AAAA,EAEA,kBAA0B;AACxB,UAAM,SAA4C;AAAA,MAChD,OAAO,KAAK,MAAM;AAAA,MAClB,aAAa,KAAK,MAAM,WAAW,SAAS;AAAA,MAC5C,UAAU;AAAA,MACV,aAAa,OAAO,KAAK,MAAM,SAAS;AAAA,IAC1C;AAGA,QAAI,KAAK,MAAM;AACb,aAAO,sBAAsB,KAAK,MAAM,kBAAkB,SAAS;AACrE,QAAI,KAAK,MAAM,aAAc,QAAO,gBAAgB,KAAK,MAAM,aAAa,SAAS;AACrF,QAAI,KAAK,MAAM,aAAc,QAAO,iBAAiB,KAAK,MAAM,aAAa,SAAS;AAEtF,QAAI,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO,UAAU,KAAK,MAAM;AAChE,QAAI,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,SAAS,EAAG,QAAO,MAAM,KAAK,MAAM;AAE3E,UAAM,UAAU,KAAK,MAAM,YAAY,QAAQ,SAAS,IAAI;AAC5D,UAAM,KAAK,YAAY,UAAU,MAAM;AACvC,WAAO,GAAG,OAAO,IAAI,EAAE;AAAA,EACzB;AAAA,EAES,QAAQ;AApcnB;AAqcI,UAAM,MAAM;AACZ,eAAK,QAAL,mBAAU;AAAA,EACZ;AACF;AAIA,SAAS,mBACP,UACA,MACA,iBACkB;AAClB,QAAM,aAAa,KAAK;AACxB,QAAM,YAAa,KAAK,SAA4C,CAAC;AAErE,MAAI,CAAC,aAAa,UAAU,WAAW,GAAG;AACxC,WAAO,CAAC;AAAA,EACV;AAEA,MAAI,aAAa;AACjB,MAAI,UAAU,SAAS,GAAG;AACxB,UAAM,MAAM,UAAU,OAAO,CAAC,KAAa,MAAM,OAAQ,EAAE,cAAyB,IAAI,CAAC;AACzF,iBAAa,MAAM,UAAU;AAAA,EAC/B;AAEA,QAAM,KAAqB;AAAA,IACzB;AAAA,IACA,YAAa,KAAK,sBAAiC,KAAK;AAAA,IACxD,UAAW,KAAK,oBAA+B,KAAK;AAAA,IACpD;AAAA,IACA,MAAM,cAAc;AAAA;AAAA;AAAA;AAAA,IAIpB,OAAO,UAAU,IAAI,CAAC,UAAU;AAAA,MAC9B,MAAO,KAAK,QAAmB;AAAA,MAC/B,YAAa,KAAK,SAAoB,KAAK;AAAA,MAC3C,UAAW,KAAK,OAAkB,KAAK;AAAA,MACvC,YAAa,KAAK,cAAyB;AAAA,MAC3C;AAAA,IACF,EAAE;AAAA,EACJ;AAEA,SAAO,CAAC,EAAE;AACZ;","names":["_a"]}
@@ -1 +1 @@
1
- {"version":3,"file":"stt_v2.d.ts","sourceRoot":"","sources":["../src/stt_v2.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAKtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAIpD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAM5C;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,QAAQ,GAAG,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAoBD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,KAAM,SAAQ,GAAG,CAAC,GAAG;;IAChC,QAAQ,CAAC,KAAK,oBAAoB;IAKlC;;;;;;;;;;;;;OAaG;gBACS,IAAI,GAAE,OAAO,CAAC,YAAY,CAAM;IAmB5C,6CAA6C;IAC7C,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,4BAA4B;IAC5B,IAAI,QAAQ,IAAI,MAAM,CAErB;cAEe,UAAU,CACxB,MAAM,EAAE,UAAU,GAAG,UAAU,EAAE,EACjC,YAAY,CAAC,EAAE,WAAW,GACzB,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI3B;;;;;OAKG;IACH,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,GAAG,CAAC,YAAY;IAKvE;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,YAAY,CAAC;CAK1C"}
1
+ {"version":3,"file":"stt_v2.d.ts","sourceRoot":"","sources":["../src/stt_v2.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAKtB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAIpD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAM5C;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,QAAQ,GAAG,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAoBD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,qBAAa,KAAM,SAAQ,GAAG,CAAC,GAAG;;IAChC,QAAQ,CAAC,KAAK,oBAAoB;IAKlC;;;;;;;;;;;;;OAaG;gBACS,IAAI,GAAE,OAAO,CAAC,YAAY,CAAM;IAoB5C,6CAA6C;IAC7C,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,4BAA4B;IAC5B,IAAI,QAAQ,IAAI,MAAM,CAErB;cAEe,UAAU,CACxB,MAAM,EAAE,UAAU,GAAG,UAAU,EAAE,EACjC,YAAY,CAAC,EAAE,WAAW,GACzB,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI3B;;;;;OAKG;IACH,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,GAAG,CAAC,YAAY;IAKvE;;;;OAIG;IACH,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,YAAY,CAAC;CAK1C"}
package/dist/stt_v2.js CHANGED
@@ -47,7 +47,8 @@ class STTv2 extends stt.STT {
47
47
  constructor(opts = {}) {
48
48
  super({
49
49
  streaming: true,
50
- interimResults: true
50
+ interimResults: true,
51
+ alignedTranscript: "word"
51
52
  });
52
53
  this.#opts = { ...defaultSTTv2Options, ...opts };
53
54
  const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;
@@ -270,7 +271,7 @@ class SpeechStreamv2 extends stt.SpeechStream {
270
271
  }
271
272
  }
272
273
  #sendTranscriptEvent(eventType, data) {
273
- const alts = parseTranscription(this.#opts.language || "en", data, 0);
274
+ const alts = parseTranscription(this.#opts.language || "en", data, this.startTimeOffset);
274
275
  if (alts.length > 0) {
275
276
  this.queue.put({
276
277
  type: eventType,
@@ -296,7 +297,6 @@ class SpeechStreamv2 extends stt.SpeechStream {
296
297
  encoding: "linear16",
297
298
  mip_opt_out: String(this.#opts.mipOptOut)
298
299
  };
299
- if (this.#opts.language) params.language = this.#opts.language;
300
300
  if (this.#opts.eagerEotThreshold)
301
301
  params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();
302
302
  if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();
@@ -315,21 +315,31 @@ class SpeechStreamv2 extends stt.SpeechStream {
315
315
  }
316
316
  function parseTranscription(language, data, startTimeOffset) {
317
317
  const transcript = data.transcript;
318
- const words = data.words || [];
319
- if (!words || words.length === 0) {
318
+ const wordsData = data.words || [];
319
+ if (!wordsData || wordsData.length === 0) {
320
320
  return [];
321
321
  }
322
322
  let confidence = 0;
323
- if (words.length > 0) {
324
- const sum = words.reduce((acc, w) => acc + (w.confidence || 0), 0);
325
- confidence = sum / words.length;
323
+ if (wordsData.length > 0) {
324
+ const sum = wordsData.reduce((acc, w) => acc + (w.confidence || 0), 0);
325
+ confidence = sum / wordsData.length;
326
326
  }
327
327
  const sd = {
328
328
  language,
329
329
  startTime: (data.audio_window_start || 0) + startTimeOffset,
330
330
  endTime: (data.audio_window_end || 0) + startTimeOffset,
331
331
  confidence,
332
- text: transcript || ""
332
+ text: transcript || "",
333
+ // Note: Deepgram V2 (Flux) API does not provide word-level timing (start/end).
334
+ // Words only contain 'word' and 'confidence' fields, so startTime/endTime will be 0.
335
+ // See: https://developers.deepgram.com/docs/flux/nova-3-migration
336
+ words: wordsData.map((word) => ({
337
+ text: word.word ?? "",
338
+ startTime: (word.start ?? 0) + startTimeOffset,
339
+ endTime: (word.end ?? 0) + startTimeOffset,
340
+ confidence: word.confidence ?? 0,
341
+ startTimeOffset
342
+ }))
333
343
  };
334
344
  return [sd];
335
345
  }
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt_v2.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Event,\n calculateAudioDurationSeconds,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport * as queryString from 'node:querystring';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { V2Models } from './models.js';\n\nconst _CLOSE_MSG = JSON.stringify({ type: 'CloseStream' });\n\n// --- Configuration ---\n\n/**\n * Configuration options for STTv2 (Deepgram Flux model).\n */\nexport interface STTv2Options {\n apiKey?: string;\n model: V2Models | string;\n sampleRate: number;\n keyterms: string[];\n endpointUrl: string;\n language?: string;\n eagerEotThreshold?: number;\n eotThreshold?: number;\n eotTimeoutMs?: number;\n mipOptOut?: boolean;\n tags?: string[];\n}\n\nconst defaultSTTv2Options: Omit<STTv2Options, 'apiKey'> = {\n model: 'flux-general-en',\n sampleRate: 16000,\n keyterms: [],\n endpointUrl: 'wss://api.deepgram.com/v2/listen',\n language: 'en',\n mipOptOut: false,\n};\n\nfunction validateTags(tags: string[]): string[] {\n for (const tag of tags) {\n if (tag.length > 128) {\n throw new Error('tag must be no more than 128 characters');\n }\n }\n return tags;\n}\n\n/**\n * Deepgram STTv2 using the Flux model for streaming speech-to-text.\n *\n * This uses Deepgram's V2 API (`/v2/listen`) which provides turn-based\n * transcription with support for preemptive generation.\n *\n * @remarks\n * Key differences from STT (V1):\n * - Uses `TurnInfo` events instead of `SpeechStarted`/`Results`\n * - Supports `eagerEotThreshold` for preemptive LLM generation\n * - Sends `PREFLIGHT_TRANSCRIPT` events when eager end-of-turn is detected\n *\n * @example\n * ```typescript\n * import { STTv2 } from '@livekit/agents-plugin-deepgram';\n *\n * const stt = new STTv2({\n * model: 'flux-general-en',\n * eagerEotThreshold: 0.5, // Enable preemptive generation\n * });\n *\n * const stream = stt.stream();\n * stream.pushFrame(audioFrame);\n *\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives?.[0]?.text);\n * }\n * }\n * ```\n */\nexport class STTv2 extends stt.STT {\n readonly label = 'deepgram.STTv2';\n #opts: STTv2Options;\n #apiKey: string;\n #logger = log();\n\n /**\n * Create a new Deepgram STTv2 instance.\n *\n * @param opts - Configuration options\n * @param opts.apiKey - Deepgram API key (defaults to `DEEPGRAM_API_KEY` env var)\n * @param opts.model - Model to use (default: `flux-general-en`)\n * @param opts.eagerEotThreshold - Threshold (0.3-0.9) for preemptive generation\n * @param opts.eotThreshold - End-of-turn detection threshold (default: 0.7)\n * @param opts.eotTimeoutMs - End-of-turn timeout in ms (default: 3000)\n * @param opts.keyterms - List of key terms to improve recognition\n * @param opts.tags - Tags for usage reporting (max 128 chars each)\n *\n * @throws Error if no API key is provided\n */\n constructor(opts: Partial<STTv2Options> = {}) {\n super({\n streaming: true,\n interimResults: true,\n });\n\n this.#opts = { ...defaultSTTv2Options, ...opts };\n\n const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;\n if (!apiKey) {\n throw new Error('Deepgram API key is required');\n }\n this.#apiKey = apiKey;\n\n if (this.#opts.tags) {\n this.#opts.tags = validateTags(this.#opts.tags);\n }\n }\n\n /** The model being used for transcription */\n get model(): string {\n return this.#opts.model;\n }\n\n /** The STT provider name */\n get provider(): string {\n return 'Deepgram';\n }\n\n protected async _recognize(\n _frame: AudioFrame | AudioFrame[],\n _abortSignal?: AbortSignal,\n ): Promise<stt.SpeechEvent> {\n throw new Error('V2 API does not support non-streaming recognize. Use .stream()');\n }\n\n /**\n * Create a new streaming transcription session.\n *\n * @param options - Stream options\n * @returns A SpeechStream that emits transcription events\n */\n stream(options?: { connOptions?: APIConnectOptions }): stt.SpeechStream {\n const streamOpts = { ...this.#opts, apiKey: this.#apiKey };\n return new SpeechStreamv2(this, streamOpts, options?.connOptions);\n }\n\n /**\n * Update STT options. Changes will take effect on the next stream.\n *\n * @param opts - Partial options to update\n */\n updateOptions(opts: Partial<STTv2Options>) {\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n this.#logger.debug('Updated STTv2 options');\n }\n}\n\n// --- Stream Implementation ---\n\nclass SpeechStreamv2 extends stt.SpeechStream {\n readonly label = 'deepgram.SpeechStreamv2';\n #opts: STTv2Options & { apiKey: string };\n #logger = log();\n #ws: WebSocket | null = null;\n\n #audioDurationCollector: PeriodicCollector<number>;\n #requestId = '';\n #speaking = false;\n\n // Parity: _reconnect_event - using existing Event class from @livekit/agents\n #reconnectEvent = new Event();\n\n constructor(\n sttInstance: STTv2,\n opts: STTv2Options & { apiKey: string },\n connOptions?: APIConnectOptions,\n ) {\n super(sttInstance, opts.sampleRate, connOptions);\n this.#opts = opts;\n\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.#onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n updateOptions(opts: Partial<STTv2Options>) {\n this.#logger.debug('Stream received option update', opts);\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n\n // Trigger reconnection loop\n this.#reconnectEvent.set();\n }\n\n protected async run() {\n // Outer Loop: Handles reconnections (Configuration updates)\n while (!this.closed) {\n try {\n this.#reconnectEvent.clear();\n\n const url = this.#getDeepgramUrl();\n this.#logger.debug(`Connecting to Deepgram: ${url}`);\n\n this.#ws = new WebSocket(url, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n // 1. Wait for Connection Open\n await new Promise<void>((resolve, reject) => {\n if (!this.#ws) return reject(new Error('WebSocket not initialized'));\n\n const onOpen = () => {\n this.#ws?.off('error', onError);\n resolve();\n };\n const onError = (err: Error) => {\n this.#ws?.off('open', onOpen);\n reject(err);\n };\n\n this.#ws.once('open', onOpen);\n this.#ws.once('error', onError);\n });\n\n // 2. Run Concurrent Tasks (Send & Receive)\n const sendPromise = this.#sendTask();\n const recvPromise = this.#recvTask();\n const reconnectWait = this.#reconnectEvent.wait();\n\n // 3. Race: Normal Completion vs Reconnect Signal\n const result = await Promise.race([\n Promise.all([sendPromise, recvPromise]),\n reconnectWait.then(() => 'RECONNECT'),\n ]);\n\n if (result === 'RECONNECT') {\n this.#logger.debug('Reconnecting stream due to option update...');\n // Close current socket; loop will restart and open a new one\n this.#ws.close();\n } else {\n // Normal finish (Stream ended or Error thrown)\n break;\n }\n } catch (error) {\n this.#logger.error('Deepgram stream error', { error });\n throw error; // Let Base Class handle retry logic\n } finally {\n if (this.#ws?.readyState === WebSocket.OPEN) {\n this.#ws.close();\n }\n }\n }\n this.close();\n }\n\n async #sendTask() {\n if (!this.#ws) return;\n\n // Buffer audio into 50ms chunks (Parity)\n const samples50ms = Math.floor(this.#opts.sampleRate / 20);\n const audioBstream = new AudioByteStream(this.#opts.sampleRate, 1, samples50ms);\n\n let hasEnded = false;\n\n // Manual Iterator to allow racing against Reconnect Signal\n const iterator = this.input[Symbol.asyncIterator]();\n\n while (true) {\n const nextPromise = iterator.next();\n // If reconnect signal fires, abort the wait\n const abortPromise = this.#reconnectEvent.wait().then(() => ({ abort: true }) as const);\n\n const result = await Promise.race([nextPromise, abortPromise]);\n\n // Check if we need to abort (Reconnect) or if stream is done\n if ('abort' in result || result.done) {\n if (!('abort' in result) && result.done) {\n // Normal stream end\n hasEnded = true;\n } else {\n // Reconnect triggered - break loop immediately\n break;\n }\n }\n\n // If we broke above, we don't process data. If not, 'result' is IteratorResult\n if (hasEnded && !('value' in result)) {\n // Process flush below\n } else if ('value' in result) {\n const data = result.value;\n const frames: AudioFrame[] = [];\n\n if (data === SpeechStreamv2.FLUSH_SENTINEL) {\n frames.push(...audioBstream.flush());\n hasEnded = true;\n } else {\n frames.push(...audioBstream.write((data as AudioFrame).data.buffer as ArrayBuffer));\n }\n\n for (const frame of frames) {\n this.#audioDurationCollector.push(calculateAudioDurationSeconds(frame));\n\n if (this.#ws!.readyState === WebSocket.OPEN) {\n this.#ws!.send(frame.data);\n }\n\n if (hasEnded) {\n this.#audioDurationCollector.flush();\n hasEnded = false;\n }\n }\n }\n\n if (hasEnded) break;\n }\n\n // Only send CloseStream if we are exiting normally (not reconnecting)\n if (!this.#reconnectEvent.isSet && this.#ws!.readyState === WebSocket.OPEN) {\n this.#logger.debug('Sending CloseStream message to Deepgram');\n this.#ws!.send(_CLOSE_MSG);\n }\n }\n\n async #recvTask() {\n if (!this.#ws) return;\n\n return new Promise<void>((resolve) => {\n if (!this.#ws) return resolve();\n\n this.#ws.on('message', (data: Buffer, isBinary: boolean) => {\n if (isBinary) {\n this.#logger.warn('Received unexpected binary message from Deepgram');\n return;\n }\n try {\n const msg = JSON.parse(data.toString());\n this.#processStreamEvent(msg);\n } catch (error) {\n this.#logger.error('Failed to parse Deepgram message', { error });\n }\n });\n\n this.#ws.on('close', (code, reason) => {\n this.#logger.debug(`Deepgram WebSocket closed: ${code} ${reason}`);\n resolve();\n });\n\n // Errors are caught by run() listener, resolve here to clean up task\n this.#ws.on('error', () => resolve());\n });\n }\n\n #processStreamEvent(data: Record<string, unknown>) {\n if (data.request_id) {\n this.#requestId = data.request_id as string;\n }\n\n if (data.type === 'TurnInfo') {\n const eventType = data.event;\n\n if (eventType === 'StartOfTurn') {\n if (this.#speaking) return;\n\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n requestId: this.#requestId,\n });\n\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'Update') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EagerEndOfTurn') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, data);\n } else if (eventType === 'TurnResumed') {\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EndOfTurn') {\n if (!this.#speaking) return;\n\n this.#speaking = false;\n this.#sendTranscriptEvent(stt.SpeechEventType.FINAL_TRANSCRIPT, data);\n\n this.queue.put({\n type: stt.SpeechEventType.END_OF_SPEECH,\n requestId: this.#requestId,\n });\n }\n } else if (data.type === 'Error') {\n this.#logger.warn('deepgram sent an error', { data });\n const desc = (data.description as string) || 'unknown error from deepgram';\n throw new Error(`Deepgram API Error: ${desc}`);\n }\n }\n\n #sendTranscriptEvent(eventType: stt.SpeechEventType, data: Record<string, unknown>) {\n // Note: start_time_offset is not yet available in the TypeScript base class\n // Using 0.0 for now - full parity would require base class changes\n const alts = parseTranscription(this.#opts.language || 'en', data, 0.0);\n\n if (alts.length > 0) {\n this.queue.put({\n type: eventType,\n requestId: this.#requestId,\n alternatives: [alts[0]!, ...alts.slice(1)],\n });\n }\n }\n\n #onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n\n #getDeepgramUrl(): string {\n const params: Record<string, string | string[]> = {\n model: this.#opts.model,\n sample_rate: this.#opts.sampleRate.toString(),\n encoding: 'linear16',\n mip_opt_out: String(this.#opts.mipOptOut),\n };\n\n if (this.#opts.language) params.language = this.#opts.language;\n if (this.#opts.eagerEotThreshold)\n params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();\n if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();\n if (this.#opts.eotTimeoutMs) params.eot_timeout_ms = this.#opts.eotTimeoutMs.toString();\n\n if (this.#opts.keyterms.length > 0) params.keyterm = this.#opts.keyterms;\n if (this.#opts.tags && this.#opts.tags.length > 0) params.tag = this.#opts.tags;\n\n const baseUrl = this.#opts.endpointUrl.replace(/^http/, 'ws');\n const qs = queryString.stringify(params);\n return `${baseUrl}?${qs}`;\n }\n\n override close() {\n super.close();\n this.#ws?.close();\n }\n}\n\n// --- Helpers ---\n\nfunction parseTranscription(\n language: string,\n data: Record<string, unknown>,\n startTimeOffset: number,\n): stt.SpeechData[] {\n const transcript = data.transcript as string | undefined;\n const words = (data.words as Array<Record<string, unknown>>) || [];\n\n if (!words || words.length === 0) {\n return [];\n }\n\n let confidence = 0;\n if (words.length > 0) {\n const sum = words.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);\n confidence = sum / words.length;\n }\n\n const sd: stt.SpeechData = {\n language: language,\n startTime: ((data.audio_window_start as number) || 0) + startTimeOffset,\n endTime: ((data.audio_window_end as number) || 0) + startTimeOffset,\n confidence: confidence,\n text: transcript || '',\n };\n\n return [sd];\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,YAAY,iBAAiB;AAC7B,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,aAAa,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC;AAqBzD,MAAM,sBAAoD;AAAA,EACxD,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU,CAAC;AAAA,EACX,aAAa;AAAA,EACb,UAAU;AAAA,EACV,WAAW;AACb;AAEA,SAAS,aAAa,MAA0B;AAC9C,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,SAAS,KAAK;AACpB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC3D;AAAA,EACF;AACA,SAAO;AACT;AAiCO,MAAM,cAAc,IAAI,IAAI;AAAA,EACxB,QAAQ;AAAA,EACjB;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBd,YAAY,OAA8B,CAAC,GAAG;AAC5C,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB;AAAA,IAClB,CAAC;AAED,SAAK,QAAQ,EAAE,GAAG,qBAAqB,GAAG,KAAK;AAE/C,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B;AAAA,IAChD;AACA,SAAK,UAAU;AAEf,QAAI,KAAK,MAAM,MAAM;AACnB,WAAK,MAAM,OAAO,aAAa,KAAK,MAAM,IAAI;AAAA,IAChD;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,QAAgB;AAClB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,WACd,QACA,cAC0B;AAC1B,UAAM,IAAI,MAAM,gEAAgE;AAAA,EAClF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,OAAO,SAAiE;AACtE,UAAM,aAAa,EAAE,GAAG,KAAK,OAAO,QAAQ,KAAK,QAAQ;AACzD,WAAO,IAAI,eAAe,MAAM,YAAY,mCAAS,WAAW;AAAA,EAClE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6B;AACzC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AACvD,SAAK,QAAQ,MAAM,uBAAuB;AAAA,EAC5C;AACF;AAIA,MAAM,uBAAuB,IAAI,aAAa;AAAA,EACnC,QAAQ;AAAA,EACjB;AAAA,EACA,UAAU,IAAI;AAAA,EACd,MAAwB;AAAA,EAExB;AAAA,EACA,aAAa;AAAA,EACb,YAAY;AAAA;AAAA,EAGZ,kBAAkB,IAAI,MAAM;AAAA,EAE5B,YACE,aACA,MACA,aACA;AACA,UAAM,aAAa,KAAK,YAAY,WAAW;AAC/C,SAAK,QAAQ;AAEb,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,uBAAuB,QAAQ;AAAA,MAClD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,cAAc,MAA6B;AACzC,SAAK,QAAQ,MAAM,iCAAiC,IAAI;AACxD,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AAGvD,SAAK,gBAAgB,IAAI;AAAA,EAC3B;AAAA,EAEA,MAAgB,MAAM;AA5MxB;AA8MI,WAAO,CAAC,KAAK,QAAQ;AACnB,UAAI;AACF,aAAK,gBAAgB,MAAM;AAE3B,cAAM,MAAM,KAAK,gBAAgB;AACjC,aAAK,QAAQ,MAAM,2BAA2B,GAAG,EAAE;AAEnD,aAAK,MAAM,IAAI,UAAU,KAAK;AAAA,UAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,QACzD,CAAC;AAGD,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,cAAI,CAAC,KAAK,IAAK,QAAO,OAAO,IAAI,MAAM,2BAA2B,CAAC;AAEnE,gBAAM,SAAS,MAAM;AA7N/B,gBAAAA;AA8NY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,SAAS;AACvB,oBAAQ;AAAA,UACV;AACA,gBAAM,UAAU,CAAC,QAAe;AAjO1C,gBAAAA;AAkOY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,QAAQ;AACtB,mBAAO,GAAG;AAAA,UACZ;AAEA,eAAK,IAAI,KAAK,QAAQ,MAAM;AAC5B,eAAK,IAAI,KAAK,SAAS,OAAO;AAAA,QAChC,CAAC;AAGD,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,gBAAgB,KAAK,gBAAgB,KAAK;AAGhD,cAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,UAChC,QAAQ,IAAI,CAAC,aAAa,WAAW,CAAC;AAAA,UACtC,cAAc,KAAK,MAAM,WAAW;AAAA,QACtC,CAAC;AAED,YAAI,WAAW,aAAa;AAC1B,eAAK,QAAQ,MAAM,6CAA6C;AAEhE,eAAK,IAAI,MAAM;AAAA,QACjB,OAAO;AAEL;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AACd,aAAK,QAAQ,MAAM,yBAAyB,EAAE,MAAM,CAAC;AACrD,cAAM;AAAA,MACR,UAAE;AACA,cAAI,UAAK,QAAL,mBAAU,gBAAe,UAAU,MAAM;AAC3C,eAAK,IAAI,MAAM;AAAA,QACjB;AAAA,MACF;AAAA,IACF;AACA,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAGf,UAAM,cAAc,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AACzD,UAAM,eAAe,IAAI,gBAAgB,KAAK,MAAM,YAAY,GAAG,WAAW;AAE9E,QAAI,WAAW;AAGf,UAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAElD,WAAO,MAAM;AACX,YAAM,cAAc,SAAS,KAAK;AAElC,YAAM,eAAe,KAAK,gBAAgB,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,KAAK,EAAW;AAEtF,YAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,aAAa,YAAY,CAAC;AAG7D,UAAI,WAAW,UAAU,OAAO,MAAM;AACpC,YAAI,EAAE,WAAW,WAAW,OAAO,MAAM;AAEvC,qBAAW;AAAA,QACb,OAAO;AAEL;AAAA,QACF;AAAA,MACF;AAGA,UAAI,YAAY,EAAE,WAAW,SAAS;AAAA,MAEtC,WAAW,WAAW,QAAQ;AAC5B,cAAM,OAAO,OAAO;AACpB,cAAM,SAAuB,CAAC;AAE9B,YAAI,SAAS,eAAe,gBAAgB;AAC1C,iBAAO,KAAK,GAAG,aAAa,MAAM,CAAC;AACnC,qBAAW;AAAA,QACb,OAAO;AACL,iBAAO,KAAK,GAAG,aAAa,MAAO,KAAoB,KAAK,MAAqB,CAAC;AAAA,QACpF;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,wBAAwB,KAAK,8BAA8B,KAAK,CAAC;AAEtE,cAAI,KAAK,IAAK,eAAe,UAAU,MAAM;AAC3C,iBAAK,IAAK,KAAK,MAAM,IAAI;AAAA,UAC3B;AAEA,cAAI,UAAU;AACZ,iBAAK,wBAAwB,MAAM;AACnC,uBAAW;AAAA,UACb;AAAA,QACF;AAAA,MACF;AAEA,UAAI,SAAU;AAAA,IAChB;AAGA,QAAI,CAAC,KAAK,gBAAgB,SAAS,KAAK,IAAK,eAAe,UAAU,MAAM;AAC1E,WAAK,QAAQ,MAAM,yCAAyC;AAC5D,WAAK,IAAK,KAAK,UAAU;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAEf,WAAO,IAAI,QAAc,CAAC,YAAY;AACpC,UAAI,CAAC,KAAK,IAAK,QAAO,QAAQ;AAE9B,WAAK,IAAI,GAAG,WAAW,CAAC,MAAc,aAAsB;AAC1D,YAAI,UAAU;AACZ,eAAK,QAAQ,KAAK,kDAAkD;AACpE;AAAA,QACF;AACA,YAAI;AACF,gBAAM,MAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AACtC,eAAK,oBAAoB,GAAG;AAAA,QAC9B,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,oCAAoC,EAAE,MAAM,CAAC;AAAA,QAClE;AAAA,MACF,CAAC;AAED,WAAK,IAAI,GAAG,SAAS,CAAC,MAAM,WAAW;AACrC,aAAK,QAAQ,MAAM,8BAA8B,IAAI,IAAI,MAAM,EAAE;AACjE,gBAAQ;AAAA,MACV,CAAC;AAGD,WAAK,IAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAAA,IACtC,CAAC;AAAA,EACH;AAAA,EAEA,oBAAoB,MAA+B;AACjD,QAAI,KAAK,YAAY;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,QAAI,KAAK,SAAS,YAAY;AAC5B,YAAM,YAAY,KAAK;AAEvB,UAAI,cAAc,eAAe;AAC/B,YAAI,KAAK,UAAW;AAEpB,aAAK,YAAY;AACjB,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,IAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAED,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,UAAU;AACjC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,kBAAkB;AACzC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,IAAI,gBAAgB,sBAAsB,IAAI;AAAA,MAC1E,WAAW,cAAc,eAAe;AACtC,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,aAAa;AACpC,YAAI,CAAC,KAAK,UAAW;AAErB,aAAK,YAAY;AACjB,aAAK,qBAAqB,IAAI,gBAAgB,kBAAkB,IAAI;AAEpE,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,IAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAAA,MACH;AAAA,IACF,WAAW,KAAK,SAAS,SAAS;AAChC,WAAK,QAAQ,KAAK,0BAA0B,EAAE,KAAK,CAAC;AACpD,YAAM,OAAQ,KAAK,eAA0B;AAC7C,YAAM,IAAI,MAAM,uBAAuB,IAAI,EAAE;AAAA,IAC/C;AAAA,EACF;AAAA,EAEA,qBAAqB,WAAgC,MAA+B;AAGlF,UAAM,OAAO,mBAAmB,KAAK,MAAM,YAAY,MAAM,MAAM,CAAG;AAEtE,QAAI,KAAK,SAAS,GAAG;AACnB,WAAK,MAAM,IAAI;AAAA,QACb,MAAM;AAAA,QACN,WAAW,KAAK;AAAA,QAChB,cAAc,CAAC,KAAK,CAAC,GAAI,GAAG,KAAK,MAAM,CAAC,CAAC;AAAA,MAC3C,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,uBAAuB,UAAkB;AACvC,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AAAA,EAEA,kBAA0B;AACxB,UAAM,SAA4C;AAAA,MAChD,OAAO,KAAK,MAAM;AAAA,MAClB,aAAa,KAAK,MAAM,WAAW,SAAS;AAAA,MAC5C,UAAU;AAAA,MACV,aAAa,OAAO,KAAK,MAAM,SAAS;AAAA,IAC1C;AAEA,QAAI,KAAK,MAAM,SAAU,QAAO,WAAW,KAAK,MAAM;AACtD,QAAI,KAAK,MAAM;AACb,aAAO,sBAAsB,KAAK,MAAM,kBAAkB,SAAS;AACrE,QAAI,KAAK,MAAM,aAAc,QAAO,gBAAgB,KAAK,MAAM,aAAa,SAAS;AACrF,QAAI,KAAK,MAAM,aAAc,QAAO,iBAAiB,KAAK,MAAM,aAAa,SAAS;AAEtF,QAAI,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO,UAAU,KAAK,MAAM;AAChE,QAAI,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,SAAS,EAAG,QAAO,MAAM,KAAK,MAAM;AAE3E,UAAM,UAAU,KAAK,MAAM,YAAY,QAAQ,SAAS,IAAI;AAC5D,UAAM,KAAK,YAAY,UAAU,MAAM;AACvC,WAAO,GAAG,OAAO,IAAI,EAAE;AAAA,EACzB;AAAA,EAES,QAAQ;AArcnB;AAscI,UAAM,MAAM;AACZ,eAAK,QAAL,mBAAU;AAAA,EACZ;AACF;AAIA,SAAS,mBACP,UACA,MACA,iBACkB;AAClB,QAAM,aAAa,KAAK;AACxB,QAAM,QAAS,KAAK,SAA4C,CAAC;AAEjE,MAAI,CAAC,SAAS,MAAM,WAAW,GAAG;AAChC,WAAO,CAAC;AAAA,EACV;AAEA,MAAI,aAAa;AACjB,MAAI,MAAM,SAAS,GAAG;AACpB,UAAM,MAAM,MAAM,OAAO,CAAC,KAAa,MAAM,OAAQ,EAAE,cAAyB,IAAI,CAAC;AACrF,iBAAa,MAAM,MAAM;AAAA,EAC3B;AAEA,QAAM,KAAqB;AAAA,IACzB;AAAA,IACA,YAAa,KAAK,sBAAiC,KAAK;AAAA,IACxD,UAAW,KAAK,oBAA+B,KAAK;AAAA,IACpD;AAAA,IACA,MAAM,cAAc;AAAA,EACtB;AAEA,SAAO,CAAC,EAAE;AACZ;","names":["_a"]}
1
+ {"version":3,"sources":["../src/stt_v2.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n Event,\n calculateAudioDurationSeconds,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport * as queryString from 'node:querystring';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { V2Models } from './models.js';\n\nconst _CLOSE_MSG = JSON.stringify({ type: 'CloseStream' });\n\n// --- Configuration ---\n\n/**\n * Configuration options for STTv2 (Deepgram Flux model).\n */\nexport interface STTv2Options {\n apiKey?: string;\n model: V2Models | string;\n sampleRate: number;\n keyterms: string[];\n endpointUrl: string;\n language?: string;\n eagerEotThreshold?: number;\n eotThreshold?: number;\n eotTimeoutMs?: number;\n mipOptOut?: boolean;\n tags?: string[];\n}\n\nconst defaultSTTv2Options: Omit<STTv2Options, 'apiKey'> = {\n model: 'flux-general-en',\n sampleRate: 16000,\n keyterms: [],\n endpointUrl: 'wss://api.deepgram.com/v2/listen',\n language: 'en',\n mipOptOut: false,\n};\n\nfunction validateTags(tags: string[]): string[] {\n for (const tag of tags) {\n if (tag.length > 128) {\n throw new Error('tag must be no more than 128 characters');\n }\n }\n return tags;\n}\n\n/**\n * Deepgram STTv2 using the Flux model for streaming speech-to-text.\n *\n * This uses Deepgram's V2 API (`/v2/listen`) which provides turn-based\n * transcription with support for preemptive generation.\n *\n * @remarks\n * Key differences from STT (V1):\n * - Uses `TurnInfo` events instead of `SpeechStarted`/`Results`\n * - Supports `eagerEotThreshold` for preemptive LLM generation\n * - Sends `PREFLIGHT_TRANSCRIPT` events when eager end-of-turn is detected\n *\n * @example\n * ```typescript\n * import { STTv2 } from '@livekit/agents-plugin-deepgram';\n *\n * const stt = new STTv2({\n * model: 'flux-general-en',\n * eagerEotThreshold: 0.5, // Enable preemptive generation\n * });\n *\n * const stream = stt.stream();\n * stream.pushFrame(audioFrame);\n *\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives?.[0]?.text);\n * }\n * }\n * ```\n */\nexport class STTv2 extends stt.STT {\n readonly label = 'deepgram.STTv2';\n #opts: STTv2Options;\n #apiKey: string;\n #logger = log();\n\n /**\n * Create a new Deepgram STTv2 instance.\n *\n * @param opts - Configuration options\n * @param opts.apiKey - Deepgram API key (defaults to `DEEPGRAM_API_KEY` env var)\n * @param opts.model - Model to use (default: `flux-general-en`)\n * @param opts.eagerEotThreshold - Threshold (0.3-0.9) for preemptive generation\n * @param opts.eotThreshold - End-of-turn detection threshold (default: 0.7)\n * @param opts.eotTimeoutMs - End-of-turn timeout in ms (default: 3000)\n * @param opts.keyterms - List of key terms to improve recognition\n * @param opts.tags - Tags for usage reporting (max 128 chars each)\n *\n * @throws Error if no API key is provided\n */\n constructor(opts: Partial<STTv2Options> = {}) {\n super({\n streaming: true,\n interimResults: true,\n alignedTranscript: 'word',\n });\n\n this.#opts = { ...defaultSTTv2Options, ...opts };\n\n const apiKey = opts.apiKey || process.env.DEEPGRAM_API_KEY;\n if (!apiKey) {\n throw new Error('Deepgram API key is required');\n }\n this.#apiKey = apiKey;\n\n if (this.#opts.tags) {\n this.#opts.tags = validateTags(this.#opts.tags);\n }\n }\n\n /** The model being used for transcription */\n get model(): string {\n return this.#opts.model;\n }\n\n /** The STT provider name */\n get provider(): string {\n return 'Deepgram';\n }\n\n protected async _recognize(\n _frame: AudioFrame | AudioFrame[],\n _abortSignal?: AbortSignal,\n ): Promise<stt.SpeechEvent> {\n throw new Error('V2 API does not support non-streaming recognize. Use .stream()');\n }\n\n /**\n * Create a new streaming transcription session.\n *\n * @param options - Stream options\n * @returns A SpeechStream that emits transcription events\n */\n stream(options?: { connOptions?: APIConnectOptions }): stt.SpeechStream {\n const streamOpts = { ...this.#opts, apiKey: this.#apiKey };\n return new SpeechStreamv2(this, streamOpts, options?.connOptions);\n }\n\n /**\n * Update STT options. Changes will take effect on the next stream.\n *\n * @param opts - Partial options to update\n */\n updateOptions(opts: Partial<STTv2Options>) {\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n this.#logger.debug('Updated STTv2 options');\n }\n}\n\n// --- Stream Implementation ---\n\nclass SpeechStreamv2 extends stt.SpeechStream {\n readonly label = 'deepgram.SpeechStreamv2';\n #opts: STTv2Options & { apiKey: string };\n #logger = log();\n #ws: WebSocket | null = null;\n\n #audioDurationCollector: PeriodicCollector<number>;\n #requestId = '';\n #speaking = false;\n\n // Parity: _reconnect_event - using existing Event class from @livekit/agents\n #reconnectEvent = new Event();\n\n constructor(\n sttInstance: STTv2,\n opts: STTv2Options & { apiKey: string },\n connOptions?: APIConnectOptions,\n ) {\n super(sttInstance, opts.sampleRate, connOptions);\n this.#opts = opts;\n\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.#onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n updateOptions(opts: Partial<STTv2Options>) {\n this.#logger.debug('Stream received option update', opts);\n this.#opts = { ...this.#opts, ...opts };\n if (opts.tags) this.#opts.tags = validateTags(opts.tags);\n\n // Trigger reconnection loop\n this.#reconnectEvent.set();\n }\n\n protected async run() {\n // Outer Loop: Handles reconnections (Configuration updates)\n while (!this.closed) {\n try {\n this.#reconnectEvent.clear();\n\n const url = this.#getDeepgramUrl();\n this.#logger.debug(`Connecting to Deepgram: ${url}`);\n\n this.#ws = new WebSocket(url, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n // 1. Wait for Connection Open\n await new Promise<void>((resolve, reject) => {\n if (!this.#ws) return reject(new Error('WebSocket not initialized'));\n\n const onOpen = () => {\n this.#ws?.off('error', onError);\n resolve();\n };\n const onError = (err: Error) => {\n this.#ws?.off('open', onOpen);\n reject(err);\n };\n\n this.#ws.once('open', onOpen);\n this.#ws.once('error', onError);\n });\n\n // 2. Run Concurrent Tasks (Send & Receive)\n const sendPromise = this.#sendTask();\n const recvPromise = this.#recvTask();\n const reconnectWait = this.#reconnectEvent.wait();\n\n // 3. Race: Normal Completion vs Reconnect Signal\n const result = await Promise.race([\n Promise.all([sendPromise, recvPromise]),\n reconnectWait.then(() => 'RECONNECT'),\n ]);\n\n if (result === 'RECONNECT') {\n this.#logger.debug('Reconnecting stream due to option update...');\n // Close current socket; loop will restart and open a new one\n this.#ws.close();\n } else {\n // Normal finish (Stream ended or Error thrown)\n break;\n }\n } catch (error) {\n this.#logger.error('Deepgram stream error', { error });\n throw error; // Let Base Class handle retry logic\n } finally {\n if (this.#ws?.readyState === WebSocket.OPEN) {\n this.#ws.close();\n }\n }\n }\n this.close();\n }\n\n async #sendTask() {\n if (!this.#ws) return;\n\n // Buffer audio into 50ms chunks (Parity)\n const samples50ms = Math.floor(this.#opts.sampleRate / 20);\n const audioBstream = new AudioByteStream(this.#opts.sampleRate, 1, samples50ms);\n\n let hasEnded = false;\n\n // Manual Iterator to allow racing against Reconnect Signal\n const iterator = this.input[Symbol.asyncIterator]();\n\n while (true) {\n const nextPromise = iterator.next();\n // If reconnect signal fires, abort the wait\n const abortPromise = this.#reconnectEvent.wait().then(() => ({ abort: true }) as const);\n\n const result = await Promise.race([nextPromise, abortPromise]);\n\n // Check if we need to abort (Reconnect) or if stream is done\n if ('abort' in result || result.done) {\n if (!('abort' in result) && result.done) {\n // Normal stream end\n hasEnded = true;\n } else {\n // Reconnect triggered - break loop immediately\n break;\n }\n }\n\n // If we broke above, we don't process data. If not, 'result' is IteratorResult\n if (hasEnded && !('value' in result)) {\n // Process flush below\n } else if ('value' in result) {\n const data = result.value;\n const frames: AudioFrame[] = [];\n\n if (data === SpeechStreamv2.FLUSH_SENTINEL) {\n frames.push(...audioBstream.flush());\n hasEnded = true;\n } else {\n frames.push(...audioBstream.write((data as AudioFrame).data.buffer as ArrayBuffer));\n }\n\n for (const frame of frames) {\n this.#audioDurationCollector.push(calculateAudioDurationSeconds(frame));\n\n if (this.#ws!.readyState === WebSocket.OPEN) {\n this.#ws!.send(frame.data);\n }\n\n if (hasEnded) {\n this.#audioDurationCollector.flush();\n hasEnded = false;\n }\n }\n }\n\n if (hasEnded) break;\n }\n\n // Only send CloseStream if we are exiting normally (not reconnecting)\n if (!this.#reconnectEvent.isSet && this.#ws!.readyState === WebSocket.OPEN) {\n this.#logger.debug('Sending CloseStream message to Deepgram');\n this.#ws!.send(_CLOSE_MSG);\n }\n }\n\n async #recvTask() {\n if (!this.#ws) return;\n\n return new Promise<void>((resolve) => {\n if (!this.#ws) return resolve();\n\n this.#ws.on('message', (data: Buffer, isBinary: boolean) => {\n if (isBinary) {\n this.#logger.warn('Received unexpected binary message from Deepgram');\n return;\n }\n try {\n const msg = JSON.parse(data.toString());\n this.#processStreamEvent(msg);\n } catch (error) {\n this.#logger.error('Failed to parse Deepgram message', { error });\n }\n });\n\n this.#ws.on('close', (code, reason) => {\n this.#logger.debug(`Deepgram WebSocket closed: ${code} ${reason}`);\n resolve();\n });\n\n // Errors are caught by run() listener, resolve here to clean up task\n this.#ws.on('error', () => resolve());\n });\n }\n\n #processStreamEvent(data: Record<string, unknown>) {\n if (data.request_id) {\n this.#requestId = data.request_id as string;\n }\n\n if (data.type === 'TurnInfo') {\n const eventType = data.event;\n\n if (eventType === 'StartOfTurn') {\n if (this.#speaking) return;\n\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n requestId: this.#requestId,\n });\n\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'Update') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EagerEndOfTurn') {\n if (!this.#speaking) return;\n this.#sendTranscriptEvent(stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, data);\n } else if (eventType === 'TurnResumed') {\n this.#sendTranscriptEvent(stt.SpeechEventType.INTERIM_TRANSCRIPT, data);\n } else if (eventType === 'EndOfTurn') {\n if (!this.#speaking) return;\n\n this.#speaking = false;\n this.#sendTranscriptEvent(stt.SpeechEventType.FINAL_TRANSCRIPT, data);\n\n this.queue.put({\n type: stt.SpeechEventType.END_OF_SPEECH,\n requestId: this.#requestId,\n });\n }\n } else if (data.type === 'Error') {\n this.#logger.warn('deepgram sent an error', { data });\n const desc = (data.description as string) || 'unknown error from deepgram';\n throw new Error(`Deepgram API Error: ${desc}`);\n }\n }\n\n #sendTranscriptEvent(eventType: stt.SpeechEventType, data: Record<string, unknown>) {\n const alts = parseTranscription(this.#opts.language || 'en', data, this.startTimeOffset);\n\n if (alts.length > 0) {\n this.queue.put({\n type: eventType,\n requestId: this.#requestId,\n alternatives: [alts[0]!, ...alts.slice(1)],\n });\n }\n }\n\n #onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n\n #getDeepgramUrl(): string {\n const params: Record<string, string | string[]> = {\n model: this.#opts.model,\n sample_rate: this.#opts.sampleRate.toString(),\n encoding: 'linear16',\n mip_opt_out: String(this.#opts.mipOptOut),\n };\n\n // Note: v2 API does NOT include 'language' parameter\n if (this.#opts.eagerEotThreshold)\n params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();\n if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();\n if (this.#opts.eotTimeoutMs) params.eot_timeout_ms = this.#opts.eotTimeoutMs.toString();\n\n if (this.#opts.keyterms.length > 0) params.keyterm = this.#opts.keyterms;\n if (this.#opts.tags && this.#opts.tags.length > 0) params.tag = this.#opts.tags;\n\n const baseUrl = this.#opts.endpointUrl.replace(/^http/, 'ws');\n const qs = queryString.stringify(params);\n return `${baseUrl}?${qs}`;\n }\n\n override close() {\n super.close();\n this.#ws?.close();\n }\n}\n\n// --- Helpers ---\n\nfunction parseTranscription(\n language: string,\n data: Record<string, unknown>,\n startTimeOffset: number,\n): stt.SpeechData[] {\n const transcript = data.transcript as string | undefined;\n const wordsData = (data.words as Array<Record<string, unknown>>) || [];\n\n if (!wordsData || wordsData.length === 0) {\n return [];\n }\n\n let confidence = 0;\n if (wordsData.length > 0) {\n const sum = wordsData.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);\n confidence = sum / wordsData.length;\n }\n\n const sd: stt.SpeechData = {\n language: language,\n startTime: ((data.audio_window_start as number) || 0) + startTimeOffset,\n endTime: ((data.audio_window_end as number) || 0) + startTimeOffset,\n confidence: confidence,\n text: transcript || '',\n // Note: Deepgram V2 (Flux) API does not provide word-level timing (start/end).\n // Words only contain 'word' and 'confidence' fields, so startTime/endTime will be 0.\n // See: https://developers.deepgram.com/docs/flux/nova-3-migration\n words: wordsData.map((word) => ({\n text: (word.word as string) ?? '',\n startTime: ((word.start as number) ?? 0) + startTimeOffset,\n endTime: ((word.end as number) ?? 0) + startTimeOffset,\n confidence: (word.confidence as number) ?? 0.0,\n startTimeOffset,\n })),\n };\n\n return [sd];\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,YAAY,iBAAiB;AAC7B,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,aAAa,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC;AAqBzD,MAAM,sBAAoD;AAAA,EACxD,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,UAAU,CAAC;AAAA,EACX,aAAa;AAAA,EACb,UAAU;AAAA,EACV,WAAW;AACb;AAEA,SAAS,aAAa,MAA0B;AAC9C,aAAW,OAAO,MAAM;AACtB,QAAI,IAAI,SAAS,KAAK;AACpB,YAAM,IAAI,MAAM,yCAAyC;AAAA,IAC3D;AAAA,EACF;AACA,SAAO;AACT;AAiCO,MAAM,cAAc,IAAI,IAAI;AAAA,EACxB,QAAQ;AAAA,EACjB;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBd,YAAY,OAA8B,CAAC,GAAG;AAC5C,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB;AAAA,MAChB,mBAAmB;AAAA,IACrB,CAAC;AAED,SAAK,QAAQ,EAAE,GAAG,qBAAqB,GAAG,KAAK;AAE/C,UAAM,SAAS,KAAK,UAAU,QAAQ,IAAI;AAC1C,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI,MAAM,8BAA8B;AAAA,IAChD;AACA,SAAK,UAAU;AAEf,QAAI,KAAK,MAAM,MAAM;AACnB,WAAK,MAAM,OAAO,aAAa,KAAK,MAAM,IAAI;AAAA,IAChD;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,QAAgB;AAClB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,IAAI,WAAmB;AACrB,WAAO;AAAA,EACT;AAAA,EAEA,MAAgB,WACd,QACA,cAC0B;AAC1B,UAAM,IAAI,MAAM,gEAAgE;AAAA,EAClF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,OAAO,SAAiE;AACtE,UAAM,aAAa,EAAE,GAAG,KAAK,OAAO,QAAQ,KAAK,QAAQ;AACzD,WAAO,IAAI,eAAe,MAAM,YAAY,mCAAS,WAAW;AAAA,EAClE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,cAAc,MAA6B;AACzC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AACvD,SAAK,QAAQ,MAAM,uBAAuB;AAAA,EAC5C;AACF;AAIA,MAAM,uBAAuB,IAAI,aAAa;AAAA,EACnC,QAAQ;AAAA,EACjB;AAAA,EACA,UAAU,IAAI;AAAA,EACd,MAAwB;AAAA,EAExB;AAAA,EACA,aAAa;AAAA,EACb,YAAY;AAAA;AAAA,EAGZ,kBAAkB,IAAI,MAAM;AAAA,EAE5B,YACE,aACA,MACA,aACA;AACA,UAAM,aAAa,KAAK,YAAY,WAAW;AAC/C,SAAK,QAAQ;AAEb,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,uBAAuB,QAAQ;AAAA,MAClD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,cAAc,MAA6B;AACzC,SAAK,QAAQ,MAAM,iCAAiC,IAAI;AACxD,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,QAAI,KAAK,KAAM,MAAK,MAAM,OAAO,aAAa,KAAK,IAAI;AAGvD,SAAK,gBAAgB,IAAI;AAAA,EAC3B;AAAA,EAEA,MAAgB,MAAM;AA7MxB;AA+MI,WAAO,CAAC,KAAK,QAAQ;AACnB,UAAI;AACF,aAAK,gBAAgB,MAAM;AAE3B,cAAM,MAAM,KAAK,gBAAgB;AACjC,aAAK,QAAQ,MAAM,2BAA2B,GAAG,EAAE;AAEnD,aAAK,MAAM,IAAI,UAAU,KAAK;AAAA,UAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,QACzD,CAAC;AAGD,cAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,cAAI,CAAC,KAAK,IAAK,QAAO,OAAO,IAAI,MAAM,2BAA2B,CAAC;AAEnE,gBAAM,SAAS,MAAM;AA9N/B,gBAAAA;AA+NY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,SAAS;AACvB,oBAAQ;AAAA,UACV;AACA,gBAAM,UAAU,CAAC,QAAe;AAlO1C,gBAAAA;AAmOY,aAAAA,MAAA,KAAK,QAAL,gBAAAA,IAAU,IAAI,QAAQ;AACtB,mBAAO,GAAG;AAAA,UACZ;AAEA,eAAK,IAAI,KAAK,QAAQ,MAAM;AAC5B,eAAK,IAAI,KAAK,SAAS,OAAO;AAAA,QAChC,CAAC;AAGD,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,cAAc,KAAK,UAAU;AACnC,cAAM,gBAAgB,KAAK,gBAAgB,KAAK;AAGhD,cAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,UAChC,QAAQ,IAAI,CAAC,aAAa,WAAW,CAAC;AAAA,UACtC,cAAc,KAAK,MAAM,WAAW;AAAA,QACtC,CAAC;AAED,YAAI,WAAW,aAAa;AAC1B,eAAK,QAAQ,MAAM,6CAA6C;AAEhE,eAAK,IAAI,MAAM;AAAA,QACjB,OAAO;AAEL;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AACd,aAAK,QAAQ,MAAM,yBAAyB,EAAE,MAAM,CAAC;AACrD,cAAM;AAAA,MACR,UAAE;AACA,cAAI,UAAK,QAAL,mBAAU,gBAAe,UAAU,MAAM;AAC3C,eAAK,IAAI,MAAM;AAAA,QACjB;AAAA,MACF;AAAA,IACF;AACA,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAGf,UAAM,cAAc,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AACzD,UAAM,eAAe,IAAI,gBAAgB,KAAK,MAAM,YAAY,GAAG,WAAW;AAE9E,QAAI,WAAW;AAGf,UAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAElD,WAAO,MAAM;AACX,YAAM,cAAc,SAAS,KAAK;AAElC,YAAM,eAAe,KAAK,gBAAgB,KAAK,EAAE,KAAK,OAAO,EAAE,OAAO,KAAK,EAAW;AAEtF,YAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,aAAa,YAAY,CAAC;AAG7D,UAAI,WAAW,UAAU,OAAO,MAAM;AACpC,YAAI,EAAE,WAAW,WAAW,OAAO,MAAM;AAEvC,qBAAW;AAAA,QACb,OAAO;AAEL;AAAA,QACF;AAAA,MACF;AAGA,UAAI,YAAY,EAAE,WAAW,SAAS;AAAA,MAEtC,WAAW,WAAW,QAAQ;AAC5B,cAAM,OAAO,OAAO;AACpB,cAAM,SAAuB,CAAC;AAE9B,YAAI,SAAS,eAAe,gBAAgB;AAC1C,iBAAO,KAAK,GAAG,aAAa,MAAM,CAAC;AACnC,qBAAW;AAAA,QACb,OAAO;AACL,iBAAO,KAAK,GAAG,aAAa,MAAO,KAAoB,KAAK,MAAqB,CAAC;AAAA,QACpF;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,wBAAwB,KAAK,8BAA8B,KAAK,CAAC;AAEtE,cAAI,KAAK,IAAK,eAAe,UAAU,MAAM;AAC3C,iBAAK,IAAK,KAAK,MAAM,IAAI;AAAA,UAC3B;AAEA,cAAI,UAAU;AACZ,iBAAK,wBAAwB,MAAM;AACnC,uBAAW;AAAA,UACb;AAAA,QACF;AAAA,MACF;AAEA,UAAI,SAAU;AAAA,IAChB;AAGA,QAAI,CAAC,KAAK,gBAAgB,SAAS,KAAK,IAAK,eAAe,UAAU,MAAM;AAC1E,WAAK,QAAQ,MAAM,yCAAyC;AAC5D,WAAK,IAAK,KAAK,UAAU;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,MAAM,YAAY;AAChB,QAAI,CAAC,KAAK,IAAK;AAEf,WAAO,IAAI,QAAc,CAAC,YAAY;AACpC,UAAI,CAAC,KAAK,IAAK,QAAO,QAAQ;AAE9B,WAAK,IAAI,GAAG,WAAW,CAAC,MAAc,aAAsB;AAC1D,YAAI,UAAU;AACZ,eAAK,QAAQ,KAAK,kDAAkD;AACpE;AAAA,QACF;AACA,YAAI;AACF,gBAAM,MAAM,KAAK,MAAM,KAAK,SAAS,CAAC;AACtC,eAAK,oBAAoB,GAAG;AAAA,QAC9B,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,oCAAoC,EAAE,MAAM,CAAC;AAAA,QAClE;AAAA,MACF,CAAC;AAED,WAAK,IAAI,GAAG,SAAS,CAAC,MAAM,WAAW;AACrC,aAAK,QAAQ,MAAM,8BAA8B,IAAI,IAAI,MAAM,EAAE;AACjE,gBAAQ;AAAA,MACV,CAAC;AAGD,WAAK,IAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAAA,IACtC,CAAC;AAAA,EACH;AAAA,EAEA,oBAAoB,MAA+B;AACjD,QAAI,KAAK,YAAY;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,QAAI,KAAK,SAAS,YAAY;AAC5B,YAAM,YAAY,KAAK;AAEvB,UAAI,cAAc,eAAe;AAC/B,YAAI,KAAK,UAAW;AAEpB,aAAK,YAAY;AACjB,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,IAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAED,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,UAAU;AACjC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,kBAAkB;AACzC,YAAI,CAAC,KAAK,UAAW;AACrB,aAAK,qBAAqB,IAAI,gBAAgB,sBAAsB,IAAI;AAAA,MAC1E,WAAW,cAAc,eAAe;AACtC,aAAK,qBAAqB,IAAI,gBAAgB,oBAAoB,IAAI;AAAA,MACxE,WAAW,cAAc,aAAa;AACpC,YAAI,CAAC,KAAK,UAAW;AAErB,aAAK,YAAY;AACjB,aAAK,qBAAqB,IAAI,gBAAgB,kBAAkB,IAAI;AAEpE,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,IAAI,gBAAgB;AAAA,UAC1B,WAAW,KAAK;AAAA,QAClB,CAAC;AAAA,MACH;AAAA,IACF,WAAW,KAAK,SAAS,SAAS;AAChC,WAAK,QAAQ,KAAK,0BAA0B,EAAE,KAAK,CAAC;AACpD,YAAM,OAAQ,KAAK,eAA0B;AAC7C,YAAM,IAAI,MAAM,uBAAuB,IAAI,EAAE;AAAA,IAC/C;AAAA,EACF;AAAA,EAEA,qBAAqB,WAAgC,MAA+B;AAClF,UAAM,OAAO,mBAAmB,KAAK,MAAM,YAAY,MAAM,MAAM,KAAK,eAAe;AAEvF,QAAI,KAAK,SAAS,GAAG;AACnB,WAAK,MAAM,IAAI;AAAA,QACb,MAAM;AAAA,QACN,WAAW,KAAK;AAAA,QAChB,cAAc,CAAC,KAAK,CAAC,GAAI,GAAG,KAAK,MAAM,CAAC,CAAC;AAAA,MAC3C,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,uBAAuB,UAAkB;AACvC,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AAAA,EAEA,kBAA0B;AACxB,UAAM,SAA4C;AAAA,MAChD,OAAO,KAAK,MAAM;AAAA,MAClB,aAAa,KAAK,MAAM,WAAW,SAAS;AAAA,MAC5C,UAAU;AAAA,MACV,aAAa,OAAO,KAAK,MAAM,SAAS;AAAA,IAC1C;AAGA,QAAI,KAAK,MAAM;AACb,aAAO,sBAAsB,KAAK,MAAM,kBAAkB,SAAS;AACrE,QAAI,KAAK,MAAM,aAAc,QAAO,gBAAgB,KAAK,MAAM,aAAa,SAAS;AACrF,QAAI,KAAK,MAAM,aAAc,QAAO,iBAAiB,KAAK,MAAM,aAAa,SAAS;AAEtF,QAAI,KAAK,MAAM,SAAS,SAAS,EAAG,QAAO,UAAU,KAAK,MAAM;AAChE,QAAI,KAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,SAAS,EAAG,QAAO,MAAM,KAAK,MAAM;AAE3E,UAAM,UAAU,KAAK,MAAM,YAAY,QAAQ,SAAS,IAAI;AAC5D,UAAM,KAAK,YAAY,UAAU,MAAM;AACvC,WAAO,GAAG,OAAO,IAAI,EAAE;AAAA,EACzB;AAAA,EAES,QAAQ;AApcnB;AAqcI,UAAM,MAAM;AACZ,eAAK,QAAL,mBAAU;AAAA,EACZ;AACF;AAIA,SAAS,mBACP,UACA,MACA,iBACkB;AAClB,QAAM,aAAa,KAAK;AACxB,QAAM,YAAa,KAAK,SAA4C,CAAC;AAErE,MAAI,CAAC,aAAa,UAAU,WAAW,GAAG;AACxC,WAAO,CAAC;AAAA,EACV;AAEA,MAAI,aAAa;AACjB,MAAI,UAAU,SAAS,GAAG;AACxB,UAAM,MAAM,UAAU,OAAO,CAAC,KAAa,MAAM,OAAQ,EAAE,cAAyB,IAAI,CAAC;AACzF,iBAAa,MAAM,UAAU;AAAA,EAC/B;AAEA,QAAM,KAAqB;AAAA,IACzB;AAAA,IACA,YAAa,KAAK,sBAAiC,KAAK;AAAA,IACxD,UAAW,KAAK,oBAA+B,KAAK;AAAA,IACpD;AAAA,IACA,MAAM,cAAc;AAAA;AAAA;AAAA;AAAA,IAIpB,OAAO,UAAU,IAAI,CAAC,UAAU;AAAA,MAC9B,MAAO,KAAK,QAAmB;AAAA,MAC/B,YAAa,KAAK,SAAoB,KAAK;AAAA,MAC3C,UAAW,KAAK,OAAkB,KAAK;AAAA,MACvC,YAAa,KAAK,cAAyB;AAAA,MAC3C;AAAA,IACF,EAAE;AAAA,EACJ;AAEA,SAAO,CAAC,EAAE;AACZ;","names":["_a"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-deepgram",
3
- "version": "1.0.37",
3
+ "version": "1.0.38",
4
4
  "description": "Deepgram plugin for LiveKit Agents for Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,16 +30,16 @@
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.37",
34
- "@livekit/agents-plugin-silero": "1.0.37",
35
- "@livekit/agents-plugins-test": "1.0.37"
33
+ "@livekit/agents": "1.0.38",
34
+ "@livekit/agents-plugin-silero": "1.0.38",
35
+ "@livekit/agents-plugins-test": "1.0.38"
36
36
  },
37
37
  "dependencies": {
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@livekit/rtc-node": "^0.13.24",
42
- "@livekit/agents": "1.0.37"
42
+ "@livekit/agents": "1.0.38"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/stt.ts CHANGED
@@ -73,6 +73,7 @@ export class STT extends stt.STT {
73
73
  super({
74
74
  streaming: true,
75
75
  interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,
76
+ alignedTranscript: 'word',
76
77
  });
77
78
  if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {
78
79
  throw new Error(
@@ -339,7 +340,11 @@ export class SpeechStream extends stt.SpeechStream {
339
340
  const isEndpoint = json['speech_final'];
340
341
  this.#requestId = requestId;
341
342
 
342
- const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);
343
+ const alternatives = liveTranscriptionToSpeechData(
344
+ this.#opts.language!,
345
+ json,
346
+ this.startTimeOffset,
347
+ );
343
348
 
344
349
  // If, for some reason, we didn't get a SpeechStarted event but we got
345
350
  // a transcript with text, we should start speaking. It's rare but has
@@ -421,14 +426,28 @@ export class SpeechStream extends stt.SpeechStream {
421
426
  const liveTranscriptionToSpeechData = (
422
427
  language: STTLanguages | string,
423
428
  data: { [id: string]: any },
429
+ startTimeOffset: number = 0,
424
430
  ): stt.SpeechData[] => {
425
431
  const alts: any[] = data['channel']['alternatives'];
426
432
 
427
- return alts.map((alt) => ({
428
- language,
429
- startTime: alt['words'].length ? alt['words'][0]['start'] : 0,
430
- endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,
431
- confidence: alt['confidence'],
432
- text: alt['transcript'],
433
- }));
433
+ return alts.map((alt) => {
434
+ const wordsData: any[] = alt['words'] ?? [];
435
+
436
+ return {
437
+ language,
438
+ startTime: wordsData.length ? wordsData[0]['start'] + startTimeOffset : startTimeOffset,
439
+ endTime: wordsData.length
440
+ ? wordsData[wordsData.length - 1]['end'] + startTimeOffset
441
+ : startTimeOffset,
442
+ confidence: alt['confidence'],
443
+ text: alt['transcript'],
444
+ words: wordsData.map((word) => ({
445
+ text: word['word'] ?? '',
446
+ startTime: (word['start'] ?? 0) + startTimeOffset,
447
+ endTime: (word['end'] ?? 0) + startTimeOffset,
448
+ confidence: word['confidence'] ?? 0.0,
449
+ startTimeOffset,
450
+ })),
451
+ };
452
+ });
434
453
  };
package/src/stt_v2.ts CHANGED
@@ -109,6 +109,7 @@ export class STTv2 extends stt.STT {
109
109
  super({
110
110
  streaming: true,
111
111
  interimResults: true,
112
+ alignedTranscript: 'word',
112
113
  });
113
114
 
114
115
  this.#opts = { ...defaultSTTv2Options, ...opts };
@@ -405,9 +406,7 @@ class SpeechStreamv2 extends stt.SpeechStream {
405
406
  }
406
407
 
407
408
  #sendTranscriptEvent(eventType: stt.SpeechEventType, data: Record<string, unknown>) {
408
- // Note: start_time_offset is not yet available in the TypeScript base class
409
- // Using 0.0 for now - full parity would require base class changes
410
- const alts = parseTranscription(this.#opts.language || 'en', data, 0.0);
409
+ const alts = parseTranscription(this.#opts.language || 'en', data, this.startTimeOffset);
411
410
 
412
411
  if (alts.length > 0) {
413
412
  this.queue.put({
@@ -437,7 +436,7 @@ class SpeechStreamv2 extends stt.SpeechStream {
437
436
  mip_opt_out: String(this.#opts.mipOptOut),
438
437
  };
439
438
 
440
- if (this.#opts.language) params.language = this.#opts.language;
439
+ // Note: v2 API does NOT include 'language' parameter
441
440
  if (this.#opts.eagerEotThreshold)
442
441
  params.eager_eot_threshold = this.#opts.eagerEotThreshold.toString();
443
442
  if (this.#opts.eotThreshold) params.eot_threshold = this.#opts.eotThreshold.toString();
@@ -465,16 +464,16 @@ function parseTranscription(
465
464
  startTimeOffset: number,
466
465
  ): stt.SpeechData[] {
467
466
  const transcript = data.transcript as string | undefined;
468
- const words = (data.words as Array<Record<string, unknown>>) || [];
467
+ const wordsData = (data.words as Array<Record<string, unknown>>) || [];
469
468
 
470
- if (!words || words.length === 0) {
469
+ if (!wordsData || wordsData.length === 0) {
471
470
  return [];
472
471
  }
473
472
 
474
473
  let confidence = 0;
475
- if (words.length > 0) {
476
- const sum = words.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);
477
- confidence = sum / words.length;
474
+ if (wordsData.length > 0) {
475
+ const sum = wordsData.reduce((acc: number, w) => acc + ((w.confidence as number) || 0), 0);
476
+ confidence = sum / wordsData.length;
478
477
  }
479
478
 
480
479
  const sd: stt.SpeechData = {
@@ -483,6 +482,16 @@ function parseTranscription(
483
482
  endTime: ((data.audio_window_end as number) || 0) + startTimeOffset,
484
483
  confidence: confidence,
485
484
  text: transcript || '',
485
+ // Note: Deepgram V2 (Flux) API does not provide word-level timing (start/end).
486
+ // Words only contain 'word' and 'confidence' fields, so startTime/endTime will be 0.
487
+ // See: https://developers.deepgram.com/docs/flux/nova-3-migration
488
+ words: wordsData.map((word) => ({
489
+ text: (word.word as string) ?? '',
490
+ startTime: ((word.start as number) ?? 0) + startTimeOffset,
491
+ endTime: ((word.end as number) ?? 0) + startTimeOffset,
492
+ confidence: (word.confidence as number) ?? 0.0,
493
+ startTimeOffset,
494
+ })),
486
495
  };
487
496
 
488
497
  return [sd];