@livekit/agents-plugin-deepgram 1.0.24 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/stt.cjs CHANGED
@@ -44,7 +44,8 @@ const defaultSTTOptions = {
44
44
  profanityFilter: false,
45
45
  dictation: false,
46
46
  diarize: false,
47
- numerals: false
47
+ numerals: false,
48
+ mipOptOut: false
48
49
  };
49
50
  class STT extends import_agents.stt.STT {
50
51
  #opts;
@@ -89,25 +90,14 @@ class STT extends import_agents.stt.STT {
89
90
  updateOptions(opts) {
90
91
  this.#opts = { ...this.#opts, ...opts };
91
92
  }
92
- stream() {
93
- return new SpeechStream(this, this.#opts, this.abortController);
93
+ stream(options) {
94
+ return new SpeechStream(this, this.#opts, options == null ? void 0 : options.connOptions);
94
95
  }
95
96
  async close() {
96
97
  this.abortController.abort();
97
98
  }
98
99
  }
99
100
  class SpeechStream extends import_agents.stt.SpeechStream {
100
- constructor(stt2, opts, abortController) {
101
- super(stt2, opts.sampleRate);
102
- this.abortController = abortController;
103
- this.#opts = opts;
104
- this.closed = false;
105
- this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
106
- this.#audioDurationCollector = new import_utils.PeriodicCollector(
107
- (duration) => this.onAudioDurationReport(duration),
108
- { duration: 5 }
109
- );
110
- }
111
101
  #opts;
112
102
  #audioEnergyFilter;
113
103
  #logger = (0, import_agents.log)();
@@ -116,6 +106,16 @@ class SpeechStream extends import_agents.stt.SpeechStream {
116
106
  #requestId = "";
117
107
  #audioDurationCollector;
118
108
  label = "deepgram.SpeechStream";
109
+ constructor(stt2, opts, connOptions) {
110
+ super(stt2, opts.sampleRate, connOptions);
111
+ this.#opts = opts;
112
+ this.closed = false;
113
+ this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
114
+ this.#audioDurationCollector = new import_utils.PeriodicCollector(
115
+ (duration) => this.onAudioDurationReport(duration),
116
+ { duration: 5 }
117
+ );
118
+ }
119
119
  async run() {
120
120
  const maxRetry = 32;
121
121
  let retries = 0;
@@ -140,7 +140,8 @@ class SpeechStream extends import_agents.stt.SpeechStream {
140
140
  keywords: this.#opts.keywords.map((x) => x.join(":")),
141
141
  keyterm: this.#opts.keyterm,
142
142
  profanity_filter: this.#opts.profanityFilter,
143
- language: this.#opts.language
143
+ language: this.#opts.language,
144
+ mip_opt_out: this.#opts.mipOptOut
144
145
  };
145
146
  Object.entries(params).forEach(([k, v]) => {
146
147
  if (v !== void 0) {
@@ -214,12 +215,10 @@ class SpeechStream extends import_agents.stt.SpeechStream {
214
215
  this.#opts.numChannels,
215
216
  samples100Ms
216
217
  );
218
+ const abortPromise = (0, import_agents.waitForAbort)(this.abortSignal);
217
219
  try {
218
220
  while (!this.closed) {
219
- const result = await Promise.race([
220
- this.input.next(),
221
- (0, import_agents.waitForAbort)(this.abortController.signal)
222
- ]);
221
+ const result = await Promise.race([this.input.next(), abortPromise]);
223
222
  if (result === void 0) return;
224
223
  if (result.done) {
225
224
  break;
@@ -249,6 +248,14 @@ class SpeechStream extends import_agents.stt.SpeechStream {
249
248
  }
250
249
  };
251
250
  const listenTask = import_agents.Task.from(async (controller) => {
251
+ const putMessage = (message) => {
252
+ if (!this.queue.closed) {
253
+ try {
254
+ this.queue.put(message);
255
+ } catch (e) {
256
+ }
257
+ }
258
+ };
252
259
  const listenMessage = new Promise((resolve, reject) => {
253
260
  ws.on("message", (msg) => {
254
261
  try {
@@ -257,12 +264,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
257
264
  case "SpeechStarted": {
258
265
  if (this.#speaking) return;
259
266
  this.#speaking = true;
260
- if (!this.queue.closed) {
261
- try {
262
- this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
263
- } catch (e) {
264
- }
265
- }
267
+ putMessage({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
266
268
  break;
267
269
  }
268
270
  // see this page:
@@ -278,17 +280,17 @@ class SpeechStream extends import_agents.stt.SpeechStream {
278
280
  if (alternatives[0] && alternatives[0].text) {
279
281
  if (!this.#speaking) {
280
282
  this.#speaking = true;
281
- this.queue.put({
283
+ putMessage({
282
284
  type: import_agents.stt.SpeechEventType.START_OF_SPEECH
283
285
  });
284
286
  }
285
287
  if (isFinal) {
286
- this.queue.put({
288
+ putMessage({
287
289
  type: import_agents.stt.SpeechEventType.FINAL_TRANSCRIPT,
288
290
  alternatives: [alternatives[0], ...alternatives.slice(1)]
289
291
  });
290
292
  } else {
291
- this.queue.put({
293
+ putMessage({
292
294
  type: import_agents.stt.SpeechEventType.INTERIM_TRANSCRIPT,
293
295
  alternatives: [alternatives[0], ...alternatives.slice(1)]
294
296
  });
@@ -296,7 +298,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
296
298
  }
297
299
  if (isEndpoint && this.#speaking) {
298
300
  this.#speaking = false;
299
- this.queue.put({ type: import_agents.stt.SpeechEventType.END_OF_SPEECH });
301
+ putMessage({ type: import_agents.stt.SpeechEventType.END_OF_SPEECH });
300
302
  }
301
303
  break;
302
304
  }
package/dist/stt.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBASO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,gBAChB,4BAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAUO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/dist/stt.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { type AudioBuffer, stt } from '@livekit/agents';
1
+ import { type APIConnectOptions, type AudioBuffer, stt } from '@livekit/agents';
2
2
  import type { STTLanguages, STTModels } from './models.js';
3
3
  export interface STTOptions {
4
4
  apiKey?: string;
@@ -19,6 +19,7 @@ export interface STTOptions {
19
19
  dictation: boolean;
20
20
  diarize: boolean;
21
21
  numerals: boolean;
22
+ mipOptOut: boolean;
22
23
  }
23
24
  export declare class STT extends stt.STT {
24
25
  #private;
@@ -27,14 +28,15 @@ export declare class STT extends stt.STT {
27
28
  constructor(opts?: Partial<STTOptions>);
28
29
  _recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
29
30
  updateOptions(opts: Partial<STTOptions>): void;
30
- stream(): SpeechStream;
31
+ stream(options?: {
32
+ connOptions?: APIConnectOptions;
33
+ }): SpeechStream;
31
34
  close(): Promise<void>;
32
35
  }
33
36
  export declare class SpeechStream extends stt.SpeechStream {
34
37
  #private;
35
- private abortController;
36
38
  label: string;
37
- constructor(stt: STT, opts: STTOptions, abortController: AbortController);
39
+ constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions);
38
40
  protected run(): Promise<void>;
39
41
  updateOptions(opts: Partial<STTOptions>): void;
40
42
  private onAudioDurationReport;
package/dist/stt.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { type AudioBuffer, stt } from '@livekit/agents';
1
+ import { type APIConnectOptions, type AudioBuffer, stt } from '@livekit/agents';
2
2
  import type { STTLanguages, STTModels } from './models.js';
3
3
  export interface STTOptions {
4
4
  apiKey?: string;
@@ -19,6 +19,7 @@ export interface STTOptions {
19
19
  dictation: boolean;
20
20
  diarize: boolean;
21
21
  numerals: boolean;
22
+ mipOptOut: boolean;
22
23
  }
23
24
  export declare class STT extends stt.STT {
24
25
  #private;
@@ -27,14 +28,15 @@ export declare class STT extends stt.STT {
27
28
  constructor(opts?: Partial<STTOptions>);
28
29
  _recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
29
30
  updateOptions(opts: Partial<STTOptions>): void;
30
- stream(): SpeechStream;
31
+ stream(options?: {
32
+ connOptions?: APIConnectOptions;
33
+ }): SpeechStream;
31
34
  close(): Promise<void>;
32
35
  }
33
36
  export declare class SpeechStream extends stt.SpeechStream {
34
37
  #private;
35
- private abortController;
36
38
  label: string;
37
- constructor(stt: STT, opts: STTOptions, abortController: AbortController);
39
+ constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions);
38
40
  protected run(): Promise<void>;
39
41
  updateOptions(opts: Partial<STTOptions>): void;
40
42
  private onAudioDurationReport;
package/dist/stt.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAuBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,IAAI,YAAY;IAIhB,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAa9C,OAAO,CAAC,eAAe;IALzB,KAAK,SAA2B;gBAG9B,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,UAAU,EACR,eAAe,EAAE,eAAe;cAY1B,GAAG;IAyEnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAqLvC,OAAO,CAAC,qBAAqB;CAU9B"}
1
+ {"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACpB;AAwBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAI7D,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAWvD,GAAG;IA0EnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IA0LvC,OAAO,CAAC,qBAAqB;CAU9B"}
package/dist/stt.js CHANGED
@@ -28,7 +28,8 @@ const defaultSTTOptions = {
28
28
  profanityFilter: false,
29
29
  dictation: false,
30
30
  diarize: false,
31
- numerals: false
31
+ numerals: false,
32
+ mipOptOut: false
32
33
  };
33
34
  class STT extends stt.STT {
34
35
  #opts;
@@ -73,25 +74,14 @@ class STT extends stt.STT {
73
74
  updateOptions(opts) {
74
75
  this.#opts = { ...this.#opts, ...opts };
75
76
  }
76
- stream() {
77
- return new SpeechStream(this, this.#opts, this.abortController);
77
+ stream(options) {
78
+ return new SpeechStream(this, this.#opts, options == null ? void 0 : options.connOptions);
78
79
  }
79
80
  async close() {
80
81
  this.abortController.abort();
81
82
  }
82
83
  }
83
84
  class SpeechStream extends stt.SpeechStream {
84
- constructor(stt2, opts, abortController) {
85
- super(stt2, opts.sampleRate);
86
- this.abortController = abortController;
87
- this.#opts = opts;
88
- this.closed = false;
89
- this.#audioEnergyFilter = new AudioEnergyFilter();
90
- this.#audioDurationCollector = new PeriodicCollector(
91
- (duration) => this.onAudioDurationReport(duration),
92
- { duration: 5 }
93
- );
94
- }
95
85
  #opts;
96
86
  #audioEnergyFilter;
97
87
  #logger = log();
@@ -100,6 +90,16 @@ class SpeechStream extends stt.SpeechStream {
100
90
  #requestId = "";
101
91
  #audioDurationCollector;
102
92
  label = "deepgram.SpeechStream";
93
+ constructor(stt2, opts, connOptions) {
94
+ super(stt2, opts.sampleRate, connOptions);
95
+ this.#opts = opts;
96
+ this.closed = false;
97
+ this.#audioEnergyFilter = new AudioEnergyFilter();
98
+ this.#audioDurationCollector = new PeriodicCollector(
99
+ (duration) => this.onAudioDurationReport(duration),
100
+ { duration: 5 }
101
+ );
102
+ }
103
103
  async run() {
104
104
  const maxRetry = 32;
105
105
  let retries = 0;
@@ -124,7 +124,8 @@ class SpeechStream extends stt.SpeechStream {
124
124
  keywords: this.#opts.keywords.map((x) => x.join(":")),
125
125
  keyterm: this.#opts.keyterm,
126
126
  profanity_filter: this.#opts.profanityFilter,
127
- language: this.#opts.language
127
+ language: this.#opts.language,
128
+ mip_opt_out: this.#opts.mipOptOut
128
129
  };
129
130
  Object.entries(params).forEach(([k, v]) => {
130
131
  if (v !== void 0) {
@@ -198,12 +199,10 @@ class SpeechStream extends stt.SpeechStream {
198
199
  this.#opts.numChannels,
199
200
  samples100Ms
200
201
  );
202
+ const abortPromise = waitForAbort(this.abortSignal);
201
203
  try {
202
204
  while (!this.closed) {
203
- const result = await Promise.race([
204
- this.input.next(),
205
- waitForAbort(this.abortController.signal)
206
- ]);
205
+ const result = await Promise.race([this.input.next(), abortPromise]);
207
206
  if (result === void 0) return;
208
207
  if (result.done) {
209
208
  break;
@@ -233,6 +232,14 @@ class SpeechStream extends stt.SpeechStream {
233
232
  }
234
233
  };
235
234
  const listenTask = Task.from(async (controller) => {
235
+ const putMessage = (message) => {
236
+ if (!this.queue.closed) {
237
+ try {
238
+ this.queue.put(message);
239
+ } catch (e) {
240
+ }
241
+ }
242
+ };
236
243
  const listenMessage = new Promise((resolve, reject) => {
237
244
  ws.on("message", (msg) => {
238
245
  try {
@@ -241,12 +248,7 @@ class SpeechStream extends stt.SpeechStream {
241
248
  case "SpeechStarted": {
242
249
  if (this.#speaking) return;
243
250
  this.#speaking = true;
244
- if (!this.queue.closed) {
245
- try {
246
- this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
247
- } catch (e) {
248
- }
249
- }
251
+ putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });
250
252
  break;
251
253
  }
252
254
  // see this page:
@@ -262,17 +264,17 @@ class SpeechStream extends stt.SpeechStream {
262
264
  if (alternatives[0] && alternatives[0].text) {
263
265
  if (!this.#speaking) {
264
266
  this.#speaking = true;
265
- this.queue.put({
267
+ putMessage({
266
268
  type: stt.SpeechEventType.START_OF_SPEECH
267
269
  });
268
270
  }
269
271
  if (isFinal) {
270
- this.queue.put({
272
+ putMessage({
271
273
  type: stt.SpeechEventType.FINAL_TRANSCRIPT,
272
274
  alternatives: [alternatives[0], ...alternatives.slice(1)]
273
275
  });
274
276
  } else {
275
- this.queue.put({
277
+ putMessage({
276
278
  type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
277
279
  alternatives: [alternatives[0], ...alternatives.slice(1)]
278
280
  });
@@ -280,7 +282,7 @@ class SpeechStream extends stt.SpeechStream {
280
282
  }
281
283
  if (isEndpoint && this.#speaking) {
282
284
  this.#speaking = false;
283
- this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
285
+ putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });
284
286
  }
285
287
  break;
286
288
  }
package/dist/stt.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,YAChB,aAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
1
+ {"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,eAAe,aAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
package/dist/tts.cjs CHANGED
@@ -60,8 +60,8 @@ class TTS extends import_agents.tts.TTS {
60
60
  );
61
61
  }
62
62
  }
63
- synthesize(text) {
64
- return new ChunkedStream(this, text, this.opts);
63
+ synthesize(text, connOptions, abortSignal) {
64
+ return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
65
65
  }
66
66
  stream() {
67
67
  return new SynthesizeStream(this, this.opts);
@@ -69,10 +69,11 @@ class TTS extends import_agents.tts.TTS {
69
69
  }
70
70
  class ChunkedStream extends import_agents.tts.ChunkedStream {
71
71
  label = "deepgram.ChunkedStream";
72
+ #logger = (0, import_agents.log)();
72
73
  opts;
73
74
  text;
74
- constructor(tts2, text, opts) {
75
- super(text, tts2);
75
+ constructor(tts2, text, opts, connOptions, abortSignal) {
76
+ super(text, tts2, connOptions, abortSignal);
76
77
  this.text = text;
77
78
  this.opts = opts;
78
79
  }
@@ -94,7 +95,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
94
95
  headers: {
95
96
  [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey}`,
96
97
  "Content-Type": "application/json"
97
- }
98
+ },
99
+ signal: this.abortSignal
98
100
  },
99
101
  (res) => {
100
102
  if (res.statusCode !== 200) {
@@ -116,7 +118,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
116
118
  }
117
119
  });
118
120
  res.on("error", (err) => {
119
- reject(err);
121
+ if (err.message === "aborted") return;
122
+ this.#logger.error({ err }, "Deepgram TTS response error");
120
123
  });
121
124
  res.on("close", () => {
122
125
  for (const frame of bstream.flush()) {
@@ -137,8 +140,10 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
137
140
  }
138
141
  );
139
142
  req.on("error", (err) => {
140
- reject(err);
143
+ if (err.name === "AbortError") return;
144
+ this.#logger.error({ err }, "Deepgram TTS request error");
141
145
  });
146
+ req.on("close", () => resolve());
142
147
  req.write(JSON.stringify(json));
143
148
  req.end();
144
149
  });
package/dist/tts.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n private opts: TTSOptions;\n private text: string;\n\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n reject(err);\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA0D;AAE1D,wBAAwB;AACxB,gBAAwC;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAzC9C;AA0CI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,IAAI;AAAA,EAChD;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,UAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,QACF;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,mBAAO,GAAG;AAAA,UACZ,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,eAAO,GAAG;AAAA,MACZ,CAAC;AAED,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n #logger = log();\n private opts: TTSOptions;\n private text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n signal: this.abortSignal,\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Deepgram TTS response error');\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Deepgram TTS request error');\n });\n\n req.on('close', () => resolve());\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,wBAAwB;AACxB,gBAAwC;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAhD9C;AAiDI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,MAAM,aAAa,WAAW;AAAA,EAC1E;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,cAAU,mBAAI;AAAA,EACN;AAAA,EACA;AAAA,EAER,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,UAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,UACA,QAAQ,KAAK;AAAA,QACf;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,gBAAI,IAAI,YAAY,UAAW;AAC/B,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,UAC3D,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,YAAI,IAAI,SAAS,aAAc;AAC/B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,MAC1D,CAAC;AAED,UAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAC/B,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
package/dist/tts.d.cts CHANGED
@@ -1,4 +1,4 @@
1
- import { tokenize, tts } from '@livekit/agents';
1
+ import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
2
2
  import type { TTSEncoding, TTSModels } from './models.js';
3
3
  export interface TTSOptions {
4
4
  model: TTSModels | string;
@@ -13,14 +13,15 @@ export declare class TTS extends tts.TTS {
13
13
  private opts;
14
14
  label: string;
15
15
  constructor(opts?: Partial<TTSOptions>);
16
- synthesize(text: string): tts.ChunkedStream;
16
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
17
17
  stream(): tts.SynthesizeStream;
18
18
  }
19
19
  export declare class ChunkedStream extends tts.ChunkedStream {
20
+ #private;
20
21
  label: string;
21
22
  private opts;
22
23
  private text;
23
- constructor(tts: TTS, text: string, opts: TTSOptions);
24
+ constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
24
25
  protected run(): Promise<void>;
25
26
  }
26
27
  export declare class SynthesizeStream extends tts.SynthesizeStream {
package/dist/tts.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { tokenize, tts } from '@livekit/agents';
1
+ import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
2
2
  import type { TTSEncoding, TTSModels } from './models.js';
3
3
  export interface TTSOptions {
4
4
  model: TTSModels | string;
@@ -13,14 +13,15 @@ export declare class TTS extends tts.TTS {
13
13
  private opts;
14
14
  label: string;
15
15
  constructor(opts?: Partial<TTSOptions>);
16
- synthesize(text: string): tts.ChunkedStream;
16
+ synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
17
17
  stream(): tts.SynthesizeStream;
18
18
  }
19
19
  export declare class ChunkedStream extends tts.ChunkedStream {
20
+ #private;
20
21
  label: string;
21
22
  private opts;
22
23
  private text;
23
- constructor(tts: TTS, text: string, opts: TTSOptions);
24
+ constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
24
25
  protected run(): Promise<void>;
25
26
  }
26
27
  export declare class SynthesizeStream extends tts.SynthesizeStream {
package/dist/tts.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EAA8B,QAAQ,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAI5E,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAM1D,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAC9C,YAAY,EAAE,GAAG,CAAC,eAAe,CAAC;CACnC;AAgBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAa;IACzB,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiB1C,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,aAAa;IAI3C,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;IAClD,KAAK,SAA4B;IACjC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAS;gBAET,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU;cAMpC,GAAG;CAyEpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,SAAS,CAA0B;IAC3C,KAAK,SAA+B;IAEpC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;IACtE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;gBAE1D,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;YAMxB,cAAc;cA4CZ,GAAG;CA4IpB"}
1
+ {"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAItB,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAM1D,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAC9C,YAAY,EAAE,GAAG,CAAC,eAAe,CAAC;CACnC;AAgBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAa;IACzB,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiB1C,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,GAAG,CAAC,aAAa;IAIpB,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;IAEjC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAS;gBAGnB,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,UAAU,EAChB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAOX,GAAG;CA6EpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,SAAS,CAA0B;IAC3C,KAAK,SAA+B;IAEpC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;IACtE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;gBAE1D,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;YAMxB,cAAc;cA4CZ,GAAG;CA4IpB"}
package/dist/tts.js CHANGED
@@ -1,4 +1,10 @@
1
- import { AudioByteStream, shortuuid, tokenize, tts } from "@livekit/agents";
1
+ import {
2
+ AudioByteStream,
3
+ log,
4
+ shortuuid,
5
+ tokenize,
6
+ tts
7
+ } from "@livekit/agents";
2
8
  import { request } from "node:https";
3
9
  import { WebSocket } from "ws";
4
10
  const AUTHORIZATION_HEADER = "Authorization";
@@ -35,8 +41,8 @@ class TTS extends tts.TTS {
35
41
  );
36
42
  }
37
43
  }
38
- synthesize(text) {
39
- return new ChunkedStream(this, text, this.opts);
44
+ synthesize(text, connOptions, abortSignal) {
45
+ return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
40
46
  }
41
47
  stream() {
42
48
  return new SynthesizeStream(this, this.opts);
@@ -44,10 +50,11 @@ class TTS extends tts.TTS {
44
50
  }
45
51
  class ChunkedStream extends tts.ChunkedStream {
46
52
  label = "deepgram.ChunkedStream";
53
+ #logger = log();
47
54
  opts;
48
55
  text;
49
- constructor(tts2, text, opts) {
50
- super(text, tts2);
56
+ constructor(tts2, text, opts, connOptions, abortSignal) {
57
+ super(text, tts2, connOptions, abortSignal);
51
58
  this.text = text;
52
59
  this.opts = opts;
53
60
  }
@@ -69,7 +76,8 @@ class ChunkedStream extends tts.ChunkedStream {
69
76
  headers: {
70
77
  [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey}`,
71
78
  "Content-Type": "application/json"
72
- }
79
+ },
80
+ signal: this.abortSignal
73
81
  },
74
82
  (res) => {
75
83
  if (res.statusCode !== 200) {
@@ -91,7 +99,8 @@ class ChunkedStream extends tts.ChunkedStream {
91
99
  }
92
100
  });
93
101
  res.on("error", (err) => {
94
- reject(err);
102
+ if (err.message === "aborted") return;
103
+ this.#logger.error({ err }, "Deepgram TTS response error");
95
104
  });
96
105
  res.on("close", () => {
97
106
  for (const frame of bstream.flush()) {
@@ -112,8 +121,10 @@ class ChunkedStream extends tts.ChunkedStream {
112
121
  }
113
122
  );
114
123
  req.on("error", (err) => {
115
- reject(err);
124
+ if (err.name === "AbortError") return;
125
+ this.#logger.error({ err }, "Deepgram TTS request error");
116
126
  });
127
+ req.on("close", () => resolve());
117
128
  req.write(JSON.stringify(json));
118
129
  req.end();
119
130
  });
package/dist/tts.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n private opts: TTSOptions;\n private text: string;\n\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n reject(err);\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA,SAAS,iBAAiB,WAAW,UAAU,WAAW;AAE1D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,SAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAzC9C;AA0CI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,IAAI;AAAA,EAChD;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,MAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,QACF;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,mBAAO,GAAG;AAAA,UACZ,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,eAAO,GAAG;AAAA,MACZ,CAAC;AAED,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
1
+ {"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n #logger = log();\n private opts: TTSOptions;\n private text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n signal: this.abortSignal,\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Deepgram TTS response error');\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Deepgram TTS request error');\n });\n\n req.on('close', () => resolve());\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,SAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAhD9C;AAiDI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,MAAM,aAAa,WAAW;AAAA,EAC1E;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACN;AAAA,EACA;AAAA,EAER,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,MAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,UACA,QAAQ,KAAK;AAAA,QACf;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,gBAAI,IAAI,YAAY,UAAW;AAC/B,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,UAC3D,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,YAAI,IAAI,SAAS,aAAc;AAC/B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,MAC1D,CAAC;AAED,UAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAC/B,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents-plugin-deepgram",
3
- "version": "1.0.24",
3
+ "version": "1.0.27",
4
4
  "description": "Deepgram plugin for LiveKit Agents for Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -30,16 +30,16 @@
30
30
  "@types/ws": "^8.5.10",
31
31
  "tsup": "^8.3.5",
32
32
  "typescript": "^5.0.0",
33
- "@livekit/agents": "1.0.24",
34
- "@livekit/agents-plugin-silero": "1.0.24",
35
- "@livekit/agents-plugins-test": "1.0.24"
33
+ "@livekit/agents": "1.0.27",
34
+ "@livekit/agents-plugin-silero": "1.0.27",
35
+ "@livekit/agents-plugins-test": "1.0.27"
36
36
  },
37
37
  "dependencies": {
38
38
  "ws": "^8.16.0"
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@livekit/rtc-node": "^0.13.12",
42
- "@livekit/agents": "1.0.24"
42
+ "@livekit/agents": "1.0.27"
43
43
  },
44
44
  "scripts": {
45
45
  "build": "tsup --onSuccess \"pnpm build:types\"",
package/src/stt.ts CHANGED
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import {
5
+ type APIConnectOptions,
5
6
  type AudioBuffer,
6
7
  AudioByteStream,
7
8
  AudioEnergyFilter,
@@ -37,6 +38,7 @@ export interface STTOptions {
37
38
  dictation: boolean;
38
39
  diarize: boolean;
39
40
  numerals: boolean;
41
+ mipOptOut: boolean;
40
42
  }
41
43
 
42
44
  const defaultSTTOptions: STTOptions = {
@@ -58,6 +60,7 @@ const defaultSTTOptions: STTOptions = {
58
60
  dictation: false,
59
61
  diarize: false,
60
62
  numerals: false,
63
+ mipOptOut: false,
61
64
  };
62
65
 
63
66
  export class STT extends stt.STT {
@@ -113,8 +116,8 @@ export class STT extends stt.STT {
113
116
  this.#opts = { ...this.#opts, ...opts };
114
117
  }
115
118
 
116
- stream(): SpeechStream {
117
- return new SpeechStream(this, this.#opts, this.abortController);
119
+ stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {
120
+ return new SpeechStream(this, this.#opts, options?.connOptions);
118
121
  }
119
122
 
120
123
  async close() {
@@ -132,12 +135,8 @@ export class SpeechStream extends stt.SpeechStream {
132
135
  #audioDurationCollector: PeriodicCollector<number>;
133
136
  label = 'deepgram.SpeechStream';
134
137
 
135
- constructor(
136
- stt: STT,
137
- opts: STTOptions,
138
- private abortController: AbortController,
139
- ) {
140
- super(stt, opts.sampleRate);
138
+ constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {
139
+ super(stt, opts.sampleRate, connOptions);
141
140
  this.#opts = opts;
142
141
  this.closed = false;
143
142
  this.#audioEnergyFilter = new AudioEnergyFilter();
@@ -173,6 +172,7 @@ export class SpeechStream extends stt.SpeechStream {
173
172
  keyterm: this.#opts.keyterm,
174
173
  profanity_filter: this.#opts.profanityFilter,
175
174
  language: this.#opts.language,
175
+ mip_opt_out: this.#opts.mipOptOut,
176
176
  };
177
177
  Object.entries(params).forEach(([k, v]) => {
178
178
  if (v !== undefined) {
@@ -260,12 +260,13 @@ export class SpeechStream extends stt.SpeechStream {
260
260
  samples100Ms,
261
261
  );
262
262
 
263
+ // waitForAbort internally sets up an abort listener on the abort signal
264
+ // we need to put it outside loop to avoid constant re-registration of the listener
265
+ const abortPromise = waitForAbort(this.abortSignal);
266
+
263
267
  try {
264
268
  while (!this.closed) {
265
- const result = await Promise.race([
266
- this.input.next(),
267
- waitForAbort(this.abortController.signal),
268
- ]);
269
+ const result = await Promise.race([this.input.next(), abortPromise]);
269
270
 
270
271
  if (result === undefined) return; // aborted
271
272
  if (result.done) {
@@ -303,6 +304,16 @@ export class SpeechStream extends stt.SpeechStream {
303
304
  };
304
305
 
305
306
  const listenTask = Task.from(async (controller) => {
307
+ const putMessage = (message: stt.SpeechEvent) => {
308
+ if (!this.queue.closed) {
309
+ try {
310
+ this.queue.put(message);
311
+ } catch (e) {
312
+ // ignore
313
+ }
314
+ }
315
+ };
316
+
306
317
  const listenMessage = new Promise<void>((resolve, reject) => {
307
318
  ws.on('message', (msg) => {
308
319
  try {
@@ -315,13 +326,7 @@ export class SpeechStream extends stt.SpeechStream {
315
326
  // It's also possible we receive a transcript without a SpeechStarted event.
316
327
  if (this.#speaking) return;
317
328
  this.#speaking = true;
318
- if (!this.queue.closed) {
319
- try {
320
- this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
321
- } catch (e) {
322
- // ignore
323
- }
324
- }
329
+ putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });
325
330
  break;
326
331
  }
327
332
  // see this page:
@@ -342,18 +347,18 @@ export class SpeechStream extends stt.SpeechStream {
342
347
  if (alternatives[0] && alternatives[0].text) {
343
348
  if (!this.#speaking) {
344
349
  this.#speaking = true;
345
- this.queue.put({
350
+ putMessage({
346
351
  type: stt.SpeechEventType.START_OF_SPEECH,
347
352
  });
348
353
  }
349
354
 
350
355
  if (isFinal) {
351
- this.queue.put({
356
+ putMessage({
352
357
  type: stt.SpeechEventType.FINAL_TRANSCRIPT,
353
358
  alternatives: [alternatives[0], ...alternatives.slice(1)],
354
359
  });
355
360
  } else {
356
- this.queue.put({
361
+ putMessage({
357
362
  type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
358
363
  alternatives: [alternatives[0], ...alternatives.slice(1)],
359
364
  });
@@ -365,7 +370,7 @@ export class SpeechStream extends stt.SpeechStream {
365
370
  // a non-empty transcript (deepgram doesn't have a SpeechEnded event)
366
371
  if (isEndpoint && this.#speaking) {
367
372
  this.#speaking = false;
368
- this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });
373
+ putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });
369
374
  }
370
375
 
371
376
  break;
package/src/tts.ts CHANGED
@@ -1,7 +1,14 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { AudioByteStream, shortuuid, tokenize, tts } from '@livekit/agents';
4
+ import {
5
+ type APIConnectOptions,
6
+ AudioByteStream,
7
+ log,
8
+ shortuuid,
9
+ tokenize,
10
+ tts,
11
+ } from '@livekit/agents';
5
12
  import type { AudioFrame } from '@livekit/rtc-node';
6
13
  import { request } from 'node:https';
7
14
  import { type RawData, WebSocket } from 'ws';
@@ -56,8 +63,12 @@ export class TTS extends tts.TTS {
56
63
  }
57
64
  }
58
65
 
59
- synthesize(text: string): tts.ChunkedStream {
60
- return new ChunkedStream(this, text, this.opts);
66
+ synthesize(
67
+ text: string,
68
+ connOptions?: APIConnectOptions,
69
+ abortSignal?: AbortSignal,
70
+ ): tts.ChunkedStream {
71
+ return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
61
72
  }
62
73
 
63
74
  stream(): tts.SynthesizeStream {
@@ -67,11 +78,18 @@ export class TTS extends tts.TTS {
67
78
 
68
79
  export class ChunkedStream extends tts.ChunkedStream {
69
80
  label = 'deepgram.ChunkedStream';
81
+ #logger = log();
70
82
  private opts: TTSOptions;
71
83
  private text: string;
72
84
 
73
- constructor(tts: TTS, text: string, opts: TTSOptions) {
74
- super(text, tts);
85
+ constructor(
86
+ tts: TTS,
87
+ text: string,
88
+ opts: TTSOptions,
89
+ connOptions?: APIConnectOptions,
90
+ abortSignal?: AbortSignal,
91
+ ) {
92
+ super(text, tts, connOptions, abortSignal);
75
93
  this.text = text;
76
94
  this.opts = opts;
77
95
  }
@@ -96,6 +114,7 @@ export class ChunkedStream extends tts.ChunkedStream {
96
114
  [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,
97
115
  'Content-Type': 'application/json',
98
116
  },
117
+ signal: this.abortSignal,
99
118
  },
100
119
  (res) => {
101
120
  if (res.statusCode !== 200) {
@@ -119,7 +138,8 @@ export class ChunkedStream extends tts.ChunkedStream {
119
138
  });
120
139
 
121
140
  res.on('error', (err) => {
122
- reject(err);
141
+ if (err.message === 'aborted') return;
142
+ this.#logger.error({ err }, 'Deepgram TTS response error');
123
143
  });
124
144
 
125
145
  res.on('close', () => {
@@ -142,9 +162,11 @@ export class ChunkedStream extends tts.ChunkedStream {
142
162
  );
143
163
 
144
164
  req.on('error', (err) => {
145
- reject(err);
165
+ if (err.name === 'AbortError') return;
166
+ this.#logger.error({ err }, 'Deepgram TTS request error');
146
167
  });
147
168
 
169
+ req.on('close', () => resolve());
148
170
  req.write(JSON.stringify(json));
149
171
  req.end();
150
172
  });