@livekit/agents-plugin-deepgram 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/stt.cjs CHANGED
@@ -50,6 +50,7 @@ class STT extends import_agents.stt.STT {
50
50
  #opts;
51
51
  #logger = (0, import_agents.log)();
52
52
  label = "deepgram.STT";
53
+ abortController = new AbortController();
53
54
  constructor(opts = defaultSTTOptions) {
54
55
  super({
55
56
  streaming: true,
@@ -89,20 +90,16 @@ class STT extends import_agents.stt.STT {
89
90
  this.#opts = { ...this.#opts, ...opts };
90
91
  }
91
92
  stream() {
92
- return new SpeechStream(this, this.#opts);
93
+ return new SpeechStream(this, this.#opts, this.abortController);
94
+ }
95
+ async close() {
96
+ this.abortController.abort();
93
97
  }
94
98
  }
95
99
  class SpeechStream extends import_agents.stt.SpeechStream {
96
- #opts;
97
- #audioEnergyFilter;
98
- #logger = (0, import_agents.log)();
99
- #speaking = false;
100
- #resetWS = new import_agents.Future();
101
- #requestId = "";
102
- #audioDurationCollector;
103
- label = "deepgram.SpeechStream";
104
- constructor(stt2, opts) {
100
+ constructor(stt2, opts, abortController) {
105
101
  super(stt2, opts.sampleRate);
102
+ this.abortController = abortController;
106
103
  this.#opts = opts;
107
104
  this.closed = false;
108
105
  this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
@@ -111,11 +108,19 @@ class SpeechStream extends import_agents.stt.SpeechStream {
111
108
  { duration: 5 }
112
109
  );
113
110
  }
111
+ #opts;
112
+ #audioEnergyFilter;
113
+ #logger = (0, import_agents.log)();
114
+ #speaking = false;
115
+ #resetWS = new import_agents.Future();
116
+ #requestId = "";
117
+ #audioDurationCollector;
118
+ label = "deepgram.SpeechStream";
114
119
  async run() {
115
120
  const maxRetry = 32;
116
121
  let retries = 0;
117
122
  let ws;
118
- while (!this.input.closed) {
123
+ while (!this.input.closed && !this.closed) {
119
124
  const streamURL = new URL(API_BASE_URL_V1);
120
125
  const params = {
121
126
  model: this.#opts.model,
@@ -157,15 +162,21 @@ class SpeechStream extends import_agents.stt.SpeechStream {
157
162
  });
158
163
  await this.#runWS(ws);
159
164
  } catch (e) {
160
- if (retries >= maxRetry) {
161
- throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
165
+ if (!this.closed && !this.input.closed) {
166
+ if (retries >= maxRetry) {
167
+ throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
168
+ }
169
+ const delay = Math.min(retries * 5, 10);
170
+ retries++;
171
+ this.#logger.warn(
172
+ `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
173
+ );
174
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
175
+ } else {
176
+ this.#logger.warn(
177
+ `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`
178
+ );
162
179
  }
163
- const delay = Math.min(retries * 5, 10);
164
- retries++;
165
- this.#logger.warn(
166
- `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
167
- );
168
- await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
169
180
  }
170
181
  }
171
182
  this.closed = true;
@@ -185,6 +196,17 @@ class SpeechStream extends import_agents.stt.SpeechStream {
185
196
  return;
186
197
  }
187
198
  }, 5e3);
199
+ const wsMonitor = import_agents.Task.from(async (controller) => {
200
+ const closed = new Promise(async (_, reject) => {
201
+ ws.once("close", (code, reason) => {
202
+ if (!closing) {
203
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
204
+ reject(new Error("WebSocket closed"));
205
+ }
206
+ });
207
+ });
208
+ await Promise.race([closed, (0, import_agents.waitForAbort)(controller.signal)]);
209
+ });
188
210
  const sendTask = async () => {
189
211
  const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
190
212
  const stream = new import_agents.AudioByteStream(
@@ -192,41 +214,44 @@ class SpeechStream extends import_agents.stt.SpeechStream {
192
214
  this.#opts.numChannels,
193
215
  samples100Ms
194
216
  );
195
- for await (const data of this.input) {
196
- let frames;
197
- if (data === SpeechStream.FLUSH_SENTINEL) {
198
- frames = stream.flush();
199
- this.#audioDurationCollector.flush();
200
- } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
201
- frames = stream.write(data.data.buffer);
202
- } else {
203
- throw new Error(`sample rate or channel count of frame does not match`);
204
- }
205
- for await (const frame of frames) {
206
- if (this.#audioEnergyFilter.pushFrame(frame)) {
207
- const frameDuration = frame.samplesPerChannel / frame.sampleRate;
208
- this.#audioDurationCollector.push(frameDuration);
209
- ws.send(frame.data.buffer);
217
+ try {
218
+ while (!this.closed) {
219
+ const result = await Promise.race([
220
+ this.input.next(),
221
+ (0, import_agents.waitForAbort)(this.abortController.signal)
222
+ ]);
223
+ if (result === void 0) return;
224
+ if (result.done) {
225
+ break;
226
+ }
227
+ const data = result.value;
228
+ let frames;
229
+ if (data === SpeechStream.FLUSH_SENTINEL) {
230
+ frames = stream.flush();
231
+ this.#audioDurationCollector.flush();
232
+ } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
233
+ frames = stream.write(data.data.buffer);
234
+ } else {
235
+ throw new Error(`sample rate or channel count of frame does not match`);
236
+ }
237
+ for await (const frame of frames) {
238
+ if (this.#audioEnergyFilter.pushFrame(frame)) {
239
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
240
+ this.#audioDurationCollector.push(frameDuration);
241
+ ws.send(frame.data.buffer);
242
+ }
210
243
  }
211
244
  }
245
+ } finally {
246
+ closing = true;
247
+ ws.send(JSON.stringify({ type: "CloseStream" }));
248
+ wsMonitor.cancel();
212
249
  }
213
- closing = true;
214
- ws.send(JSON.stringify({ type: "CloseStream" }));
215
250
  };
216
- const wsMonitor = new Promise(
217
- (_, reject) => ws.once("close", (code, reason) => {
218
- if (!closing) {
219
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
220
- reject(new Error("WebSocket closed"));
221
- }
222
- })
223
- );
224
- const listenTask = async () => {
225
- while (!this.closed && !closing) {
226
- try {
227
- await new Promise((resolve) => {
228
- ws.once("message", (data) => resolve(data));
229
- }).then((msg) => {
251
+ const listenTask = import_agents.Task.from(async (controller) => {
252
+ const listenMessage = new Promise((resolve, reject) => {
253
+ ws.on("message", (msg) => {
254
+ try {
230
255
  const json = JSON.parse(msg.toString());
231
256
  switch (json["type"]) {
232
257
  case "SpeechStarted": {
@@ -248,7 +273,9 @@ class SpeechStream extends import_agents.stt.SpeechStream {
248
273
  if (alternatives[0] && alternatives[0].text) {
249
274
  if (!this.#speaking) {
250
275
  this.#speaking = true;
251
- this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
276
+ this.queue.put({
277
+ type: import_agents.stt.SpeechEventType.START_OF_SPEECH
278
+ });
252
279
  }
253
280
  if (isFinal) {
254
281
  this.queue.put({
@@ -276,14 +303,21 @@ class SpeechStream extends import_agents.stt.SpeechStream {
276
303
  break;
277
304
  }
278
305
  }
279
- });
280
- } catch (error) {
281
- this.#logger.child({ error }).warn("unrecoverable error, exiting");
282
- break;
283
- }
284
- }
285
- };
286
- await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);
306
+ if (this.closed || closing) {
307
+ resolve();
308
+ }
309
+ } catch (err) {
310
+ this.#logger.error(`STT: Error processing message: ${msg}`);
311
+ reject(err);
312
+ }
313
+ });
314
+ });
315
+ await Promise.race([listenMessage, (0, import_agents.waitForAbort)(controller.signal)]);
316
+ }, this.abortController);
317
+ await Promise.race([
318
+ this.#resetWS.await,
319
+ Promise.all([sendTask(), listenTask.result, wsMonitor])
320
+ ]);
287
321
  closing = true;
288
322
  ws.close();
289
323
  clearInterval(keepalive);
package/dist/stt.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,gBAAwC;AACxC,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,MAAK,KAAK,UAAU;AAC1B,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AACtB,eAAK,wBAAwB,MAAM;AAAA,QACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,kBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,iBAAK,wBAAwB,KAAK,aAAa;AAC/C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,QACtC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBASO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,gBAChB,4BAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/dist/stt.d.cts CHANGED
@@ -23,15 +23,18 @@ export interface STTOptions {
23
23
  export declare class STT extends stt.STT {
24
24
  #private;
25
25
  label: string;
26
+ private abortController;
26
27
  constructor(opts?: Partial<STTOptions>);
27
28
  _recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
28
29
  updateOptions(opts: Partial<STTOptions>): void;
29
30
  stream(): SpeechStream;
31
+ close(): Promise<void>;
30
32
  }
31
33
  export declare class SpeechStream extends stt.SpeechStream {
32
34
  #private;
35
+ private abortController;
33
36
  label: string;
34
- constructor(stt: STT, opts: STTOptions);
37
+ constructor(stt: STT, opts: STTOptions, abortController: AbortController);
35
38
  protected run(): Promise<void>;
36
39
  updateOptions(opts: Partial<STTOptions>): void;
37
40
  private onAudioDurationReport;
package/dist/stt.d.ts CHANGED
@@ -23,15 +23,18 @@ export interface STTOptions {
23
23
  export declare class STT extends stt.STT {
24
24
  #private;
25
25
  label: string;
26
+ private abortController;
26
27
  constructor(opts?: Partial<STTOptions>);
27
28
  _recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
28
29
  updateOptions(opts: Partial<STTOptions>): void;
29
30
  stream(): SpeechStream;
31
+ close(): Promise<void>;
30
32
  }
31
33
  export declare class SpeechStream extends stt.SpeechStream {
32
34
  #private;
35
+ private abortController;
33
36
  label: string;
34
- constructor(stt: STT, opts: STTOptions);
37
+ constructor(stt: STT, opts: STTOptions, abortController: AbortController);
35
38
  protected run(): Promise<void>;
36
39
  updateOptions(opts: Partial<STTOptions>): void;
37
40
  private onAudioDurationReport;
package/dist/stt.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAKhB,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;CAGvB;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;cAWtB,GAAG;IAkEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAkJvC,OAAO,CAAC,qBAAqB;CAU9B"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;IAIhB,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAa9C,OAAO,CAAC,eAAe;IALzB,KAAK,SAA2B;gBAG9B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,UAAU,EACR,eAAe,EAAE,eAAe;cAY1B,GAAG;IAyEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IA+KvC,OAAO,CAAC,qBAAqB;CAU9B"}
package/dist/stt.js CHANGED
@@ -2,8 +2,10 @@ import {
2
2
  AudioByteStream,
3
3
  AudioEnergyFilter,
4
4
  Future,
5
+ Task,
5
6
  log,
6
- stt
7
+ stt,
8
+ waitForAbort
7
9
  } from "@livekit/agents";
8
10
  import { WebSocket } from "ws";
9
11
  import { PeriodicCollector } from "./_utils.js";
@@ -32,6 +34,7 @@ class STT extends stt.STT {
32
34
  #opts;
33
35
  #logger = log();
34
36
  label = "deepgram.STT";
37
+ abortController = new AbortController();
35
38
  constructor(opts = defaultSTTOptions) {
36
39
  super({
37
40
  streaming: true,
@@ -71,20 +74,16 @@ class STT extends stt.STT {
71
74
  this.#opts = { ...this.#opts, ...opts };
72
75
  }
73
76
  stream() {
74
- return new SpeechStream(this, this.#opts);
77
+ return new SpeechStream(this, this.#opts, this.abortController);
78
+ }
79
+ async close() {
80
+ this.abortController.abort();
75
81
  }
76
82
  }
77
83
  class SpeechStream extends stt.SpeechStream {
78
- #opts;
79
- #audioEnergyFilter;
80
- #logger = log();
81
- #speaking = false;
82
- #resetWS = new Future();
83
- #requestId = "";
84
- #audioDurationCollector;
85
- label = "deepgram.SpeechStream";
86
- constructor(stt2, opts) {
84
+ constructor(stt2, opts, abortController) {
87
85
  super(stt2, opts.sampleRate);
86
+ this.abortController = abortController;
88
87
  this.#opts = opts;
89
88
  this.closed = false;
90
89
  this.#audioEnergyFilter = new AudioEnergyFilter();
@@ -93,11 +92,19 @@ class SpeechStream extends stt.SpeechStream {
93
92
  { duration: 5 }
94
93
  );
95
94
  }
95
+ #opts;
96
+ #audioEnergyFilter;
97
+ #logger = log();
98
+ #speaking = false;
99
+ #resetWS = new Future();
100
+ #requestId = "";
101
+ #audioDurationCollector;
102
+ label = "deepgram.SpeechStream";
96
103
  async run() {
97
104
  const maxRetry = 32;
98
105
  let retries = 0;
99
106
  let ws;
100
- while (!this.input.closed) {
107
+ while (!this.input.closed && !this.closed) {
101
108
  const streamURL = new URL(API_BASE_URL_V1);
102
109
  const params = {
103
110
  model: this.#opts.model,
@@ -139,15 +146,21 @@ class SpeechStream extends stt.SpeechStream {
139
146
  });
140
147
  await this.#runWS(ws);
141
148
  } catch (e) {
142
- if (retries >= maxRetry) {
143
- throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
149
+ if (!this.closed && !this.input.closed) {
150
+ if (retries >= maxRetry) {
151
+ throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
152
+ }
153
+ const delay = Math.min(retries * 5, 10);
154
+ retries++;
155
+ this.#logger.warn(
156
+ `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
157
+ );
158
+ await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
159
+ } else {
160
+ this.#logger.warn(
161
+ `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`
162
+ );
144
163
  }
145
- const delay = Math.min(retries * 5, 10);
146
- retries++;
147
- this.#logger.warn(
148
- `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`
149
- );
150
- await new Promise((resolve) => setTimeout(resolve, delay * 1e3));
151
164
  }
152
165
  }
153
166
  this.closed = true;
@@ -167,6 +180,17 @@ class SpeechStream extends stt.SpeechStream {
167
180
  return;
168
181
  }
169
182
  }, 5e3);
183
+ const wsMonitor = Task.from(async (controller) => {
184
+ const closed = new Promise(async (_, reject) => {
185
+ ws.once("close", (code, reason) => {
186
+ if (!closing) {
187
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
188
+ reject(new Error("WebSocket closed"));
189
+ }
190
+ });
191
+ });
192
+ await Promise.race([closed, waitForAbort(controller.signal)]);
193
+ });
170
194
  const sendTask = async () => {
171
195
  const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
172
196
  const stream = new AudioByteStream(
@@ -174,41 +198,44 @@ class SpeechStream extends stt.SpeechStream {
174
198
  this.#opts.numChannels,
175
199
  samples100Ms
176
200
  );
177
- for await (const data of this.input) {
178
- let frames;
179
- if (data === SpeechStream.FLUSH_SENTINEL) {
180
- frames = stream.flush();
181
- this.#audioDurationCollector.flush();
182
- } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
183
- frames = stream.write(data.data.buffer);
184
- } else {
185
- throw new Error(`sample rate or channel count of frame does not match`);
186
- }
187
- for await (const frame of frames) {
188
- if (this.#audioEnergyFilter.pushFrame(frame)) {
189
- const frameDuration = frame.samplesPerChannel / frame.sampleRate;
190
- this.#audioDurationCollector.push(frameDuration);
191
- ws.send(frame.data.buffer);
201
+ try {
202
+ while (!this.closed) {
203
+ const result = await Promise.race([
204
+ this.input.next(),
205
+ waitForAbort(this.abortController.signal)
206
+ ]);
207
+ if (result === void 0) return;
208
+ if (result.done) {
209
+ break;
210
+ }
211
+ const data = result.value;
212
+ let frames;
213
+ if (data === SpeechStream.FLUSH_SENTINEL) {
214
+ frames = stream.flush();
215
+ this.#audioDurationCollector.flush();
216
+ } else if (data.sampleRate === this.#opts.sampleRate || data.channels === this.#opts.numChannels) {
217
+ frames = stream.write(data.data.buffer);
218
+ } else {
219
+ throw new Error(`sample rate or channel count of frame does not match`);
220
+ }
221
+ for await (const frame of frames) {
222
+ if (this.#audioEnergyFilter.pushFrame(frame)) {
223
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
224
+ this.#audioDurationCollector.push(frameDuration);
225
+ ws.send(frame.data.buffer);
226
+ }
192
227
  }
193
228
  }
229
+ } finally {
230
+ closing = true;
231
+ ws.send(JSON.stringify({ type: "CloseStream" }));
232
+ wsMonitor.cancel();
194
233
  }
195
- closing = true;
196
- ws.send(JSON.stringify({ type: "CloseStream" }));
197
234
  };
198
- const wsMonitor = new Promise(
199
- (_, reject) => ws.once("close", (code, reason) => {
200
- if (!closing) {
201
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
202
- reject(new Error("WebSocket closed"));
203
- }
204
- })
205
- );
206
- const listenTask = async () => {
207
- while (!this.closed && !closing) {
208
- try {
209
- await new Promise((resolve) => {
210
- ws.once("message", (data) => resolve(data));
211
- }).then((msg) => {
235
+ const listenTask = Task.from(async (controller) => {
236
+ const listenMessage = new Promise((resolve, reject) => {
237
+ ws.on("message", (msg) => {
238
+ try {
212
239
  const json = JSON.parse(msg.toString());
213
240
  switch (json["type"]) {
214
241
  case "SpeechStarted": {
@@ -230,7 +257,9 @@ class SpeechStream extends stt.SpeechStream {
230
257
  if (alternatives[0] && alternatives[0].text) {
231
258
  if (!this.#speaking) {
232
259
  this.#speaking = true;
233
- this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
260
+ this.queue.put({
261
+ type: stt.SpeechEventType.START_OF_SPEECH
262
+ });
234
263
  }
235
264
  if (isFinal) {
236
265
  this.queue.put({
@@ -258,14 +287,21 @@ class SpeechStream extends stt.SpeechStream {
258
287
  break;
259
288
  }
260
289
  }
261
- });
262
- } catch (error) {
263
- this.#logger.child({ error }).warn("unrecoverable error, exiting");
264
- break;
265
- }
266
- }
267
- };
268
- await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);
290
+ if (this.closed || closing) {
291
+ resolve();
292
+ }
293
+ } catch (err) {
294
+ this.#logger.error(`STT: Error processing message: ${msg}`);
295
+ reject(err);
296
+ }
297
+ });
298
+ });
299
+ await Promise.race([listenMessage, waitForAbort(controller.signal)]);
300
+ }, this.abortController);
301
+ await Promise.race([
302
+ this.#resetWS.await,
303
+ Promise.all([sendTask(), listenTask.result, wsMonitor])
304
+ ]);
269
305
  closing = true;
270
306
  ws.close();
271
307
  clearInterval(keepalive);
package/dist/stt.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n log,\n stt,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { type RawData, WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts);\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n while (!this.input.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n for await (const data of this.input) {\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n };\n\n const wsMonitor = new Promise<void>((_, reject) =>\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n }),\n );\n\n const listenTask = async () => {\n while (!this.closed && !closing) {\n try {\n await new Promise<RawData>((resolve) => {\n ws.once('message', (data) => resolve(data));\n }).then((msg) => {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n });\n } catch (error) {\n this.#logger.child({ error }).warn('unrecoverable error, exiting');\n break;\n }\n }\n };\n\n await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAuB,iBAAiB;AACxC,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EAER,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,KAAK;AAAA,EAC1C;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB;AACtC,UAAMA,MAAK,KAAK,UAAU;AAC1B,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AACJ,WAAO,CAAC,KAAK,MAAM,QAAQ;AACzB,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,WAAW,UAAU;AACvB,gBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,QACjF;AAEA,cAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,aAAK,QAAQ;AAAA,UACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,QAC3F;AACA,cAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,MAClE;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAEP,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI;AACJ,YAAI,SAAS,aAAa,gBAAgB;AACxC,mBAAS,OAAO,MAAM;AACtB,eAAK,wBAAwB,MAAM;AAAA,QACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,mBAAS,OAAO,MAAM,KAAK,KAAK,MAAM;AAAA,QACxC,OAAO;AACL,gBAAM,IAAI,MAAM,sDAAsD;AAAA,QACxE;AAEA,yBAAiB,SAAS,QAAQ;AAChC,cAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,kBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,iBAAK,wBAAwB,KAAK,aAAa;AAC/C,eAAG,KAAK,MAAM,KAAK,MAAM;AAAA,UAC3B;AAAA,QACF;AAAA,MACF;AAEA,gBAAU;AACV,SAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAAA,IACjD;AAEA,UAAM,YAAY,IAAI;AAAA,MAAc,CAAC,GAAG,WACtC,GAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,YAAI,CAAC,SAAS;AACZ,eAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,iBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,QACtC;AAAA,MACF,CAAC;AAAA,IACH;AAEA,UAAM,aAAa,YAAY;AAC7B,aAAO,CAAC,KAAK,UAAU,CAAC,SAAS;AAC/B,YAAI;AACF,gBAAM,IAAI,QAAiB,CAAC,YAAY;AACtC,eAAG,KAAK,WAAW,CAAC,SAAS,QAAQ,IAAI,CAAC;AAAA,UAC5C,CAAC,EAAE,KAAK,CAAC,QAAQ;AACf,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAAA,UACF,CAAC;AAAA,QACH,SAAS,OAAO;AACd,eAAK,QAAQ,MAAM,EAAE,MAAM,CAAC,EAAE,KAAK,8BAA8B;AACjE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,QAAQ,KAAK,CAAC,KAAK,SAAS,OAAO,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC;AAC5F,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,YAChB,aAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,qBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAC5D;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-deepgram",
3
- "version": "1.0.18",
3
+ "version": "1.0.20",
4
4
  "description": "Deepgram plugin for LiveKit Agents for Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,16 +30,16 @@
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.18",
34
- "@livekit/agents-plugin-silero": "1.0.18",
35
- "@livekit/agents-plugins-test": "1.0.18"
33
+ "@livekit/agents": "1.0.20",
34
+ "@livekit/agents-plugins-test": "1.0.20",
35
+ "@livekit/agents-plugin-silero": "1.0.20"
36
36
  },
37
37
  "dependencies": {
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@livekit/rtc-node": "^0.13.12",
42
- "@livekit/agents": "1.0.18"
42
+ "@livekit/agents": "1.0.20"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/stt.ts CHANGED
@@ -6,11 +6,13 @@ import {
6
6
  AudioByteStream,
7
7
  AudioEnergyFilter,
8
8
  Future,
9
+ Task,
9
10
  log,
10
11
  stt,
12
+ waitForAbort,
11
13
  } from '@livekit/agents';
12
14
  import type { AudioFrame } from '@livekit/rtc-node';
13
- import { type RawData, WebSocket } from 'ws';
15
+ import { WebSocket } from 'ws';
14
16
  import { PeriodicCollector } from './_utils.js';
15
17
  import type { STTLanguages, STTModels } from './models.js';
16
18
 
@@ -62,6 +64,7 @@ export class STT extends stt.STT {
62
64
  #opts: STTOptions;
63
65
  #logger = log();
64
66
  label = 'deepgram.STT';
67
+ private abortController = new AbortController();
65
68
 
66
69
  constructor(opts: Partial<STTOptions> = defaultSTTOptions) {
67
70
  super({
@@ -111,7 +114,11 @@ export class STT extends stt.STT {
111
114
  }
112
115
 
113
116
  stream(): SpeechStream {
114
- return new SpeechStream(this, this.#opts);
117
+ return new SpeechStream(this, this.#opts, this.abortController);
118
+ }
119
+
120
+ async close() {
121
+ this.abortController.abort();
115
122
  }
116
123
  }
117
124
 
@@ -125,7 +132,11 @@ export class SpeechStream extends stt.SpeechStream {
125
132
  #audioDurationCollector: PeriodicCollector<number>;
126
133
  label = 'deepgram.SpeechStream';
127
134
 
128
- constructor(stt: STT, opts: STTOptions) {
135
+ constructor(
136
+ stt: STT,
137
+ opts: STTOptions,
138
+ private abortController: AbortController,
139
+ ) {
129
140
  super(stt, opts.sampleRate);
130
141
  this.#opts = opts;
131
142
  this.closed = false;
@@ -140,7 +151,8 @@ export class SpeechStream extends stt.SpeechStream {
140
151
  const maxRetry = 32;
141
152
  let retries = 0;
142
153
  let ws: WebSocket;
143
- while (!this.input.closed) {
154
+
155
+ while (!this.input.closed && !this.closed) {
144
156
  const streamURL = new URL(API_BASE_URL_V1);
145
157
  const params = {
146
158
  model: this.#opts.model,
@@ -185,17 +197,23 @@ export class SpeechStream extends stt.SpeechStream {
185
197
 
186
198
  await this.#runWS(ws);
187
199
  } catch (e) {
188
- if (retries >= maxRetry) {
189
- throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
190
- }
200
+ if (!this.closed && !this.input.closed) {
201
+ if (retries >= maxRetry) {
202
+ throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);
203
+ }
191
204
 
192
- const delay = Math.min(retries * 5, 10);
193
- retries++;
205
+ const delay = Math.min(retries * 5, 10);
206
+ retries++;
194
207
 
195
- this.#logger.warn(
196
- `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,
197
- );
198
- await new Promise((resolve) => setTimeout(resolve, delay * 1000));
208
+ this.#logger.warn(
209
+ `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,
210
+ );
211
+ await new Promise((resolve) => setTimeout(resolve, delay * 1000));
212
+ } else {
213
+ this.#logger.warn(
214
+ `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,
215
+ );
216
+ }
199
217
  }
200
218
  }
201
219
 
@@ -220,6 +238,20 @@ export class SpeechStream extends stt.SpeechStream {
220
238
  }
221
239
  }, 5000);
222
240
 
241
+ // gets cancelled also when sendTask is complete
242
+ const wsMonitor = Task.from(async (controller) => {
243
+ const closed = new Promise<void>(async (_, reject) => {
244
+ ws.once('close', (code, reason) => {
245
+ if (!closing) {
246
+ this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
247
+ reject(new Error('WebSocket closed'));
248
+ }
249
+ });
250
+ });
251
+
252
+ await Promise.race([closed, waitForAbort(controller.signal)]);
253
+ });
254
+
223
255
  const sendTask = async () => {
224
256
  const samples100Ms = Math.floor(this.#opts.sampleRate / 10);
225
257
  const stream = new AudioByteStream(
@@ -228,48 +260,52 @@ export class SpeechStream extends stt.SpeechStream {
228
260
  samples100Ms,
229
261
  );
230
262
 
231
- for await (const data of this.input) {
232
- let frames: AudioFrame[];
233
- if (data === SpeechStream.FLUSH_SENTINEL) {
234
- frames = stream.flush();
235
- this.#audioDurationCollector.flush();
236
- } else if (
237
- data.sampleRate === this.#opts.sampleRate ||
238
- data.channels === this.#opts.numChannels
239
- ) {
240
- frames = stream.write(data.data.buffer);
241
- } else {
242
- throw new Error(`sample rate or channel count of frame does not match`);
243
- }
263
+ try {
264
+ while (!this.closed) {
265
+ const result = await Promise.race([
266
+ this.input.next(),
267
+ waitForAbort(this.abortController.signal),
268
+ ]);
269
+
270
+ if (result === undefined) return; // aborted
271
+ if (result.done) {
272
+ break;
273
+ }
274
+
275
+ const data = result.value;
276
+
277
+ let frames: AudioFrame[];
278
+ if (data === SpeechStream.FLUSH_SENTINEL) {
279
+ frames = stream.flush();
280
+ this.#audioDurationCollector.flush();
281
+ } else if (
282
+ data.sampleRate === this.#opts.sampleRate ||
283
+ data.channels === this.#opts.numChannels
284
+ ) {
285
+ frames = stream.write(data.data.buffer as ArrayBuffer);
286
+ } else {
287
+ throw new Error(`sample rate or channel count of frame does not match`);
288
+ }
244
289
 
245
- for await (const frame of frames) {
246
- if (this.#audioEnergyFilter.pushFrame(frame)) {
247
- const frameDuration = frame.samplesPerChannel / frame.sampleRate;
248
- this.#audioDurationCollector.push(frameDuration);
249
- ws.send(frame.data.buffer);
290
+ for await (const frame of frames) {
291
+ if (this.#audioEnergyFilter.pushFrame(frame)) {
292
+ const frameDuration = frame.samplesPerChannel / frame.sampleRate;
293
+ this.#audioDurationCollector.push(frameDuration);
294
+ ws.send(frame.data.buffer);
295
+ }
250
296
  }
251
297
  }
298
+ } finally {
299
+ closing = true;
300
+ ws.send(JSON.stringify({ type: 'CloseStream' }));
301
+ wsMonitor.cancel();
252
302
  }
253
-
254
- closing = true;
255
- ws.send(JSON.stringify({ type: 'CloseStream' }));
256
303
  };
257
304
 
258
- const wsMonitor = new Promise<void>((_, reject) =>
259
- ws.once('close', (code, reason) => {
260
- if (!closing) {
261
- this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
262
- reject(new Error('WebSocket closed'));
263
- }
264
- }),
265
- );
266
-
267
- const listenTask = async () => {
268
- while (!this.closed && !closing) {
269
- try {
270
- await new Promise<RawData>((resolve) => {
271
- ws.once('message', (data) => resolve(data));
272
- }).then((msg) => {
305
+ const listenTask = Task.from(async (controller) => {
306
+ const listenMessage = new Promise<void>((resolve, reject) => {
307
+ ws.on('message', (msg) => {
308
+ try {
273
309
  const json = JSON.parse(msg.toString());
274
310
  switch (json['type']) {
275
311
  case 'SpeechStarted': {
@@ -300,7 +336,9 @@ export class SpeechStream extends stt.SpeechStream {
300
336
  if (alternatives[0] && alternatives[0].text) {
301
337
  if (!this.#speaking) {
302
338
  this.#speaking = true;
303
- this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
339
+ this.queue.put({
340
+ type: stt.SpeechEventType.START_OF_SPEECH,
341
+ });
304
342
  }
305
343
 
306
344
  if (isFinal) {
@@ -334,15 +372,24 @@ export class SpeechStream extends stt.SpeechStream {
334
372
  break;
335
373
  }
336
374
  }
337
- });
338
- } catch (error) {
339
- this.#logger.child({ error }).warn('unrecoverable error, exiting');
340
- break;
341
- }
342
- }
343
- };
344
375
 
345
- await Promise.race([this.#resetWS.await, Promise.all([sendTask(), listenTask(), wsMonitor])]);
376
+ if (this.closed || closing) {
377
+ resolve();
378
+ }
379
+ } catch (err) {
380
+ this.#logger.error(`STT: Error processing message: ${msg}`);
381
+ reject(err);
382
+ }
383
+ });
384
+ });
385
+
386
+ await Promise.race([listenMessage, waitForAbort(controller.signal)]);
387
+ }, this.abortController);
388
+
389
+ await Promise.race([
390
+ this.#resetWS.await,
391
+ Promise.all([sendTask(), listenTask.result, wsMonitor]),
392
+ ]);
346
393
  closing = true;
347
394
  ws.close();
348
395
  clearInterval(keepalive);