@livekit/agents-plugin-deepgram 1.0.24 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stt.cjs +31 -29
- package/dist/stt.cjs.map +1 -1
- package/dist/stt.d.cts +6 -4
- package/dist/stt.d.ts +6 -4
- package/dist/stt.d.ts.map +1 -1
- package/dist/stt.js +31 -29
- package/dist/stt.js.map +1 -1
- package/dist/tts.cjs +12 -7
- package/dist/tts.cjs.map +1 -1
- package/dist/tts.d.cts +4 -3
- package/dist/tts.d.ts +4 -3
- package/dist/tts.d.ts.map +1 -1
- package/dist/tts.js +19 -8
- package/dist/tts.js.map +1 -1
- package/package.json +5 -5
- package/src/stt.ts +28 -23
- package/src/tts.ts +29 -7
package/dist/stt.cjs
CHANGED
|
@@ -44,7 +44,8 @@ const defaultSTTOptions = {
|
|
|
44
44
|
profanityFilter: false,
|
|
45
45
|
dictation: false,
|
|
46
46
|
diarize: false,
|
|
47
|
-
numerals: false
|
|
47
|
+
numerals: false,
|
|
48
|
+
mipOptOut: false
|
|
48
49
|
};
|
|
49
50
|
class STT extends import_agents.stt.STT {
|
|
50
51
|
#opts;
|
|
@@ -89,25 +90,14 @@ class STT extends import_agents.stt.STT {
|
|
|
89
90
|
updateOptions(opts) {
|
|
90
91
|
this.#opts = { ...this.#opts, ...opts };
|
|
91
92
|
}
|
|
92
|
-
stream() {
|
|
93
|
-
return new SpeechStream(this, this.#opts,
|
|
93
|
+
stream(options) {
|
|
94
|
+
return new SpeechStream(this, this.#opts, options == null ? void 0 : options.connOptions);
|
|
94
95
|
}
|
|
95
96
|
async close() {
|
|
96
97
|
this.abortController.abort();
|
|
97
98
|
}
|
|
98
99
|
}
|
|
99
100
|
class SpeechStream extends import_agents.stt.SpeechStream {
|
|
100
|
-
constructor(stt2, opts, abortController) {
|
|
101
|
-
super(stt2, opts.sampleRate);
|
|
102
|
-
this.abortController = abortController;
|
|
103
|
-
this.#opts = opts;
|
|
104
|
-
this.closed = false;
|
|
105
|
-
this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
|
|
106
|
-
this.#audioDurationCollector = new import_utils.PeriodicCollector(
|
|
107
|
-
(duration) => this.onAudioDurationReport(duration),
|
|
108
|
-
{ duration: 5 }
|
|
109
|
-
);
|
|
110
|
-
}
|
|
111
101
|
#opts;
|
|
112
102
|
#audioEnergyFilter;
|
|
113
103
|
#logger = (0, import_agents.log)();
|
|
@@ -116,6 +106,16 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
116
106
|
#requestId = "";
|
|
117
107
|
#audioDurationCollector;
|
|
118
108
|
label = "deepgram.SpeechStream";
|
|
109
|
+
constructor(stt2, opts, connOptions) {
|
|
110
|
+
super(stt2, opts.sampleRate, connOptions);
|
|
111
|
+
this.#opts = opts;
|
|
112
|
+
this.closed = false;
|
|
113
|
+
this.#audioEnergyFilter = new import_agents.AudioEnergyFilter();
|
|
114
|
+
this.#audioDurationCollector = new import_utils.PeriodicCollector(
|
|
115
|
+
(duration) => this.onAudioDurationReport(duration),
|
|
116
|
+
{ duration: 5 }
|
|
117
|
+
);
|
|
118
|
+
}
|
|
119
119
|
async run() {
|
|
120
120
|
const maxRetry = 32;
|
|
121
121
|
let retries = 0;
|
|
@@ -140,7 +140,8 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
140
140
|
keywords: this.#opts.keywords.map((x) => x.join(":")),
|
|
141
141
|
keyterm: this.#opts.keyterm,
|
|
142
142
|
profanity_filter: this.#opts.profanityFilter,
|
|
143
|
-
language: this.#opts.language
|
|
143
|
+
language: this.#opts.language,
|
|
144
|
+
mip_opt_out: this.#opts.mipOptOut
|
|
144
145
|
};
|
|
145
146
|
Object.entries(params).forEach(([k, v]) => {
|
|
146
147
|
if (v !== void 0) {
|
|
@@ -214,12 +215,10 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
214
215
|
this.#opts.numChannels,
|
|
215
216
|
samples100Ms
|
|
216
217
|
);
|
|
218
|
+
const abortPromise = (0, import_agents.waitForAbort)(this.abortSignal);
|
|
217
219
|
try {
|
|
218
220
|
while (!this.closed) {
|
|
219
|
-
const result = await Promise.race([
|
|
220
|
-
this.input.next(),
|
|
221
|
-
(0, import_agents.waitForAbort)(this.abortController.signal)
|
|
222
|
-
]);
|
|
221
|
+
const result = await Promise.race([this.input.next(), abortPromise]);
|
|
223
222
|
if (result === void 0) return;
|
|
224
223
|
if (result.done) {
|
|
225
224
|
break;
|
|
@@ -249,6 +248,14 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
249
248
|
}
|
|
250
249
|
};
|
|
251
250
|
const listenTask = import_agents.Task.from(async (controller) => {
|
|
251
|
+
const putMessage = (message) => {
|
|
252
|
+
if (!this.queue.closed) {
|
|
253
|
+
try {
|
|
254
|
+
this.queue.put(message);
|
|
255
|
+
} catch (e) {
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
};
|
|
252
259
|
const listenMessage = new Promise((resolve, reject) => {
|
|
253
260
|
ws.on("message", (msg) => {
|
|
254
261
|
try {
|
|
@@ -257,12 +264,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
257
264
|
case "SpeechStarted": {
|
|
258
265
|
if (this.#speaking) return;
|
|
259
266
|
this.#speaking = true;
|
|
260
|
-
|
|
261
|
-
try {
|
|
262
|
-
this.queue.put({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
|
|
263
|
-
} catch (e) {
|
|
264
|
-
}
|
|
265
|
-
}
|
|
267
|
+
putMessage({ type: import_agents.stt.SpeechEventType.START_OF_SPEECH });
|
|
266
268
|
break;
|
|
267
269
|
}
|
|
268
270
|
// see this page:
|
|
@@ -278,17 +280,17 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
278
280
|
if (alternatives[0] && alternatives[0].text) {
|
|
279
281
|
if (!this.#speaking) {
|
|
280
282
|
this.#speaking = true;
|
|
281
|
-
|
|
283
|
+
putMessage({
|
|
282
284
|
type: import_agents.stt.SpeechEventType.START_OF_SPEECH
|
|
283
285
|
});
|
|
284
286
|
}
|
|
285
287
|
if (isFinal) {
|
|
286
|
-
|
|
288
|
+
putMessage({
|
|
287
289
|
type: import_agents.stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
288
290
|
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
289
291
|
});
|
|
290
292
|
} else {
|
|
291
|
-
|
|
293
|
+
putMessage({
|
|
292
294
|
type: import_agents.stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
293
295
|
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
294
296
|
});
|
|
@@ -296,7 +298,7 @@ class SpeechStream extends import_agents.stt.SpeechStream {
|
|
|
296
298
|
}
|
|
297
299
|
if (isEndpoint && this.#speaking) {
|
|
298
300
|
this.#speaking = false;
|
|
299
|
-
|
|
301
|
+
putMessage({ type: import_agents.stt.SpeechEventType.END_OF_SPEECH });
|
|
300
302
|
}
|
|
301
303
|
break;
|
|
302
304
|
}
|
package/dist/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBASO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,gBAChB,4BAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAUO;AAEP,gBAA0B;AAC1B,mBAAkC;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EAC/B;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,kBAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,cAAU,mBAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,qBAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,gCAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,oBAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,qBAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,mBAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,YAAQ,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,mBAAe,4BAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,mBAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,kBAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,kBAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,kBAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,mBAAe,4BAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,kBAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
package/dist/stt.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type AudioBuffer, stt } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, type AudioBuffer, stt } from '@livekit/agents';
|
|
2
2
|
import type { STTLanguages, STTModels } from './models.js';
|
|
3
3
|
export interface STTOptions {
|
|
4
4
|
apiKey?: string;
|
|
@@ -19,6 +19,7 @@ export interface STTOptions {
|
|
|
19
19
|
dictation: boolean;
|
|
20
20
|
diarize: boolean;
|
|
21
21
|
numerals: boolean;
|
|
22
|
+
mipOptOut: boolean;
|
|
22
23
|
}
|
|
23
24
|
export declare class STT extends stt.STT {
|
|
24
25
|
#private;
|
|
@@ -27,14 +28,15 @@ export declare class STT extends stt.STT {
|
|
|
27
28
|
constructor(opts?: Partial<STTOptions>);
|
|
28
29
|
_recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
|
|
29
30
|
updateOptions(opts: Partial<STTOptions>): void;
|
|
30
|
-
stream(
|
|
31
|
+
stream(options?: {
|
|
32
|
+
connOptions?: APIConnectOptions;
|
|
33
|
+
}): SpeechStream;
|
|
31
34
|
close(): Promise<void>;
|
|
32
35
|
}
|
|
33
36
|
export declare class SpeechStream extends stt.SpeechStream {
|
|
34
37
|
#private;
|
|
35
|
-
private abortController;
|
|
36
38
|
label: string;
|
|
37
|
-
constructor(stt: STT, opts: STTOptions,
|
|
39
|
+
constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions);
|
|
38
40
|
protected run(): Promise<void>;
|
|
39
41
|
updateOptions(opts: Partial<STTOptions>): void;
|
|
40
42
|
private onAudioDurationReport;
|
package/dist/stt.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type AudioBuffer, stt } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, type AudioBuffer, stt } from '@livekit/agents';
|
|
2
2
|
import type { STTLanguages, STTModels } from './models.js';
|
|
3
3
|
export interface STTOptions {
|
|
4
4
|
apiKey?: string;
|
|
@@ -19,6 +19,7 @@ export interface STTOptions {
|
|
|
19
19
|
dictation: boolean;
|
|
20
20
|
diarize: boolean;
|
|
21
21
|
numerals: boolean;
|
|
22
|
+
mipOptOut: boolean;
|
|
22
23
|
}
|
|
23
24
|
export declare class STT extends stt.STT {
|
|
24
25
|
#private;
|
|
@@ -27,14 +28,15 @@ export declare class STT extends stt.STT {
|
|
|
27
28
|
constructor(opts?: Partial<STTOptions>);
|
|
28
29
|
_recognize(_: AudioBuffer): Promise<stt.SpeechEvent>;
|
|
29
30
|
updateOptions(opts: Partial<STTOptions>): void;
|
|
30
|
-
stream(
|
|
31
|
+
stream(options?: {
|
|
32
|
+
connOptions?: APIConnectOptions;
|
|
33
|
+
}): SpeechStream;
|
|
31
34
|
close(): Promise<void>;
|
|
32
35
|
}
|
|
33
36
|
export declare class SpeechStream extends stt.SpeechStream {
|
|
34
37
|
#private;
|
|
35
|
-
private abortController;
|
|
36
38
|
label: string;
|
|
37
|
-
constructor(stt: STT, opts: STTOptions,
|
|
39
|
+
constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions);
|
|
38
40
|
protected run(): Promise<void>;
|
|
39
41
|
updateOptions(opts: Partial<STTOptions>): void;
|
|
40
42
|
private onAudioDurationReport;
|
package/dist/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../src/stt.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EACtB,KAAK,WAAW,EAMhB,GAAG,EAEJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAI3D,MAAM,WAAW,UAAU;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;IACjC,cAAc,EAAE,OAAO,CAAC;IACxB,cAAc,EAAE,OAAO,CAAC;IACxB,SAAS,EAAE,OAAO,CAAC;IACnB,KAAK,EAAE,SAAS,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,OAAO,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC7B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,eAAe,EAAE,OAAO,CAAC;IACzB,SAAS,EAAE,OAAO,CAAC;IACnB,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,OAAO,CAAC;CACpB;AAwBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;;IAG9B,KAAK,SAAkB;IACvB,OAAO,CAAC,eAAe,CAAyB;gBAEpC,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAuCnD,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;IAI1D,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IAIvC,MAAM,CAAC,OAAO,CAAC,EAAE;QAAE,WAAW,CAAC,EAAE,iBAAiB,CAAA;KAAE,GAAG,YAAY;IAI7D,KAAK;CAGZ;AAED,qBAAa,YAAa,SAAQ,GAAG,CAAC,YAAY;;IAQhD,KAAK,SAA2B;gBAEpB,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,EAAE,iBAAiB;cAWvD,GAAG;IA0EnB,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,UAAU,CAAC;IA0LvC,OAAO,CAAC,qBAAqB;CAU9B"}
|
package/dist/stt.js
CHANGED
|
@@ -28,7 +28,8 @@ const defaultSTTOptions = {
|
|
|
28
28
|
profanityFilter: false,
|
|
29
29
|
dictation: false,
|
|
30
30
|
diarize: false,
|
|
31
|
-
numerals: false
|
|
31
|
+
numerals: false,
|
|
32
|
+
mipOptOut: false
|
|
32
33
|
};
|
|
33
34
|
class STT extends stt.STT {
|
|
34
35
|
#opts;
|
|
@@ -73,25 +74,14 @@ class STT extends stt.STT {
|
|
|
73
74
|
updateOptions(opts) {
|
|
74
75
|
this.#opts = { ...this.#opts, ...opts };
|
|
75
76
|
}
|
|
76
|
-
stream() {
|
|
77
|
-
return new SpeechStream(this, this.#opts,
|
|
77
|
+
stream(options) {
|
|
78
|
+
return new SpeechStream(this, this.#opts, options == null ? void 0 : options.connOptions);
|
|
78
79
|
}
|
|
79
80
|
async close() {
|
|
80
81
|
this.abortController.abort();
|
|
81
82
|
}
|
|
82
83
|
}
|
|
83
84
|
class SpeechStream extends stt.SpeechStream {
|
|
84
|
-
constructor(stt2, opts, abortController) {
|
|
85
|
-
super(stt2, opts.sampleRate);
|
|
86
|
-
this.abortController = abortController;
|
|
87
|
-
this.#opts = opts;
|
|
88
|
-
this.closed = false;
|
|
89
|
-
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
90
|
-
this.#audioDurationCollector = new PeriodicCollector(
|
|
91
|
-
(duration) => this.onAudioDurationReport(duration),
|
|
92
|
-
{ duration: 5 }
|
|
93
|
-
);
|
|
94
|
-
}
|
|
95
85
|
#opts;
|
|
96
86
|
#audioEnergyFilter;
|
|
97
87
|
#logger = log();
|
|
@@ -100,6 +90,16 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
100
90
|
#requestId = "";
|
|
101
91
|
#audioDurationCollector;
|
|
102
92
|
label = "deepgram.SpeechStream";
|
|
93
|
+
constructor(stt2, opts, connOptions) {
|
|
94
|
+
super(stt2, opts.sampleRate, connOptions);
|
|
95
|
+
this.#opts = opts;
|
|
96
|
+
this.closed = false;
|
|
97
|
+
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
98
|
+
this.#audioDurationCollector = new PeriodicCollector(
|
|
99
|
+
(duration) => this.onAudioDurationReport(duration),
|
|
100
|
+
{ duration: 5 }
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
103
|
async run() {
|
|
104
104
|
const maxRetry = 32;
|
|
105
105
|
let retries = 0;
|
|
@@ -124,7 +124,8 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
124
124
|
keywords: this.#opts.keywords.map((x) => x.join(":")),
|
|
125
125
|
keyterm: this.#opts.keyterm,
|
|
126
126
|
profanity_filter: this.#opts.profanityFilter,
|
|
127
|
-
language: this.#opts.language
|
|
127
|
+
language: this.#opts.language,
|
|
128
|
+
mip_opt_out: this.#opts.mipOptOut
|
|
128
129
|
};
|
|
129
130
|
Object.entries(params).forEach(([k, v]) => {
|
|
130
131
|
if (v !== void 0) {
|
|
@@ -198,12 +199,10 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
198
199
|
this.#opts.numChannels,
|
|
199
200
|
samples100Ms
|
|
200
201
|
);
|
|
202
|
+
const abortPromise = waitForAbort(this.abortSignal);
|
|
201
203
|
try {
|
|
202
204
|
while (!this.closed) {
|
|
203
|
-
const result = await Promise.race([
|
|
204
|
-
this.input.next(),
|
|
205
|
-
waitForAbort(this.abortController.signal)
|
|
206
|
-
]);
|
|
205
|
+
const result = await Promise.race([this.input.next(), abortPromise]);
|
|
207
206
|
if (result === void 0) return;
|
|
208
207
|
if (result.done) {
|
|
209
208
|
break;
|
|
@@ -233,6 +232,14 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
233
232
|
}
|
|
234
233
|
};
|
|
235
234
|
const listenTask = Task.from(async (controller) => {
|
|
235
|
+
const putMessage = (message) => {
|
|
236
|
+
if (!this.queue.closed) {
|
|
237
|
+
try {
|
|
238
|
+
this.queue.put(message);
|
|
239
|
+
} catch (e) {
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
};
|
|
236
243
|
const listenMessage = new Promise((resolve, reject) => {
|
|
237
244
|
ws.on("message", (msg) => {
|
|
238
245
|
try {
|
|
@@ -241,12 +248,7 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
241
248
|
case "SpeechStarted": {
|
|
242
249
|
if (this.#speaking) return;
|
|
243
250
|
this.#speaking = true;
|
|
244
|
-
|
|
245
|
-
try {
|
|
246
|
-
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
247
|
-
} catch (e) {
|
|
248
|
-
}
|
|
249
|
-
}
|
|
251
|
+
putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
250
252
|
break;
|
|
251
253
|
}
|
|
252
254
|
// see this page:
|
|
@@ -262,17 +264,17 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
262
264
|
if (alternatives[0] && alternatives[0].text) {
|
|
263
265
|
if (!this.#speaking) {
|
|
264
266
|
this.#speaking = true;
|
|
265
|
-
|
|
267
|
+
putMessage({
|
|
266
268
|
type: stt.SpeechEventType.START_OF_SPEECH
|
|
267
269
|
});
|
|
268
270
|
}
|
|
269
271
|
if (isFinal) {
|
|
270
|
-
|
|
272
|
+
putMessage({
|
|
271
273
|
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
272
274
|
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
273
275
|
});
|
|
274
276
|
} else {
|
|
275
|
-
|
|
277
|
+
putMessage({
|
|
276
278
|
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
277
279
|
alternatives: [alternatives[0], ...alternatives.slice(1)]
|
|
278
280
|
});
|
|
@@ -280,7 +282,7 @@ class SpeechStream extends stt.SpeechStream {
|
|
|
280
282
|
}
|
|
281
283
|
if (isEndpoint && this.#speaking) {
|
|
282
284
|
this.#speaking = false;
|
|
283
|
-
|
|
285
|
+
putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
284
286
|
}
|
|
285
287
|
break;
|
|
286
288
|
}
|
package/dist/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(): SpeechStream {\n return new SpeechStream(this, this.#opts, this.abortController);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(\n stt: STT,\n opts: STTOptions,\n private abortController: AbortController,\n ) {\n super(stt, opts.sampleRate);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n try {\n while (!this.closed) {\n const result = await Promise.race([\n this.input.next(),\n waitForAbort(this.abortController.signal),\n ]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n if (!this.queue.closed) {\n try {\n this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });\n } catch (e) {\n // ignore\n }\n }\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n this.queue.put({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n this.queue.put({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n this.queue.put({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n this.queue.put({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAuBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AACZ;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,SAAuB;AACrB,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,KAAK,eAAe;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EAUjD,YACEA,MACA,MACQ,iBACR;AACA,UAAMA,MAAK,KAAK,UAAU;AAFlB;AAGR,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAtBA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAiBR,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,MACvB;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAEA,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK;AAAA,YAChC,KAAK,MAAM,KAAK;AAAA,YAChB,aAAa,KAAK,gBAAgB,MAAM;AAAA,UAC1C,CAAC;AAED,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,oBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,sBAAI;AACF,yBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AAAA,kBAC9D,SAAS,GAAG;AAAA,kBAEZ;AAAA,gBACF;AACA;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,yBAAK,MAAM,IAAI;AAAA,sBACb,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,uBAAK,MAAM,IAAI,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBAC5D;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
|
1
|
+
{"version":3,"sources":["../src/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n type AudioBuffer,\n AudioByteStream,\n AudioEnergyFilter,\n Future,\n Task,\n log,\n stt,\n waitForAbort,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { WebSocket } from 'ws';\nimport { PeriodicCollector } from './_utils.js';\nimport type { STTLanguages, STTModels } from './models.js';\n\nconst API_BASE_URL_V1 = 'wss://api.deepgram.com/v1/listen';\n\nexport interface STTOptions {\n apiKey?: string;\n language?: STTLanguages | string;\n detectLanguage: boolean;\n interimResults: boolean;\n punctuate: boolean;\n model: STTModels;\n smartFormat: boolean;\n noDelay: boolean;\n endpointing: number;\n fillerWords: boolean;\n sampleRate: number;\n numChannels: number;\n keywords: [string, number][];\n keyterm: string[];\n profanityFilter: boolean;\n dictation: boolean;\n diarize: boolean;\n numerals: boolean;\n mipOptOut: boolean;\n}\n\nconst defaultSTTOptions: STTOptions = {\n apiKey: process.env.DEEPGRAM_API_KEY,\n language: 'en-US',\n detectLanguage: false,\n interimResults: true,\n punctuate: true,\n model: 'nova-3',\n smartFormat: true,\n noDelay: true,\n endpointing: 25,\n fillerWords: false,\n sampleRate: 16000,\n numChannels: 1,\n keywords: [],\n keyterm: [],\n profanityFilter: false,\n dictation: false,\n diarize: false,\n numerals: false,\n mipOptOut: false,\n};\n\nexport class STT extends stt.STT {\n #opts: STTOptions;\n #logger = log();\n label = 'deepgram.STT';\n private abortController = new AbortController();\n\n constructor(opts: Partial<STTOptions> = defaultSTTOptions) {\n super({\n streaming: true,\n interimResults: opts.interimResults ?? defaultSTTOptions.interimResults,\n });\n if (opts.apiKey === undefined && defaultSTTOptions.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n\n this.#opts = { ...defaultSTTOptions, ...opts };\n\n if (this.#opts.detectLanguage) {\n this.#opts.language = undefined;\n } else if (\n this.#opts.language &&\n !['en-US', 'en'].includes(this.#opts.language) &&\n [\n 'nova-2-meeting',\n 'nova-2-phonecall',\n 'nova-2-finance',\n 'nova-2-conversationalai',\n 'nova-2-voicemail',\n 'nova-2-video',\n 'nova-2-medical',\n 'nova-2-drivethru',\n 'nova-2-automotive',\n 'nova-3-general',\n ].includes(this.#opts.model)\n ) {\n this.#logger.warn(\n `${this.#opts.model} does not support language ${this.#opts.language}, falling back to nova-2-general`,\n );\n this.#opts.model = 'nova-2-general';\n }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n async _recognize(_: AudioBuffer): Promise<stt.SpeechEvent> {\n throw new Error('Recognize is not supported on Deepgram STT');\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n }\n\n stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {\n return new SpeechStream(this, this.#opts, options?.connOptions);\n }\n\n async close() {\n this.abortController.abort();\n }\n}\n\nexport class SpeechStream extends stt.SpeechStream {\n #opts: STTOptions;\n #audioEnergyFilter: AudioEnergyFilter;\n #logger = log();\n #speaking = false;\n #resetWS = new Future();\n #requestId = '';\n #audioDurationCollector: PeriodicCollector<number>;\n label = 'deepgram.SpeechStream';\n\n constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {\n super(stt, opts.sampleRate, connOptions);\n this.#opts = opts;\n this.closed = false;\n this.#audioEnergyFilter = new AudioEnergyFilter();\n this.#audioDurationCollector = new PeriodicCollector(\n (duration) => this.onAudioDurationReport(duration),\n { duration: 5.0 },\n );\n }\n\n protected async run() {\n const maxRetry = 32;\n let retries = 0;\n let ws: WebSocket;\n\n while (!this.input.closed && !this.closed) {\n const streamURL = new URL(API_BASE_URL_V1);\n const params = {\n model: this.#opts.model,\n punctuate: this.#opts.punctuate,\n smart_format: this.#opts.smartFormat,\n dictation: this.#opts.dictation,\n diarize: this.#opts.diarize,\n numerals: this.#opts.numerals,\n no_delay: this.#opts.noDelay,\n interim_results: this.#opts.interimResults,\n encoding: 'linear16',\n vad_events: true,\n sample_rate: this.#opts.sampleRate,\n channels: this.#opts.numChannels,\n endpointing: this.#opts.endpointing || false,\n filler_words: this.#opts.fillerWords,\n keywords: this.#opts.keywords.map((x) => x.join(':')),\n keyterm: this.#opts.keyterm,\n profanity_filter: this.#opts.profanityFilter,\n language: this.#opts.language,\n mip_opt_out: this.#opts.mipOptOut,\n };\n Object.entries(params).forEach(([k, v]) => {\n if (v !== undefined) {\n if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {\n streamURL.searchParams.append(k, encodeURIComponent(v));\n } else {\n v.forEach((x) => streamURL.searchParams.append(k, encodeURIComponent(x)));\n }\n }\n });\n\n ws = new WebSocket(streamURL, {\n headers: { Authorization: `Token ${this.#opts.apiKey}` },\n });\n\n try {\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n await this.#runWS(ws);\n } catch (e) {\n if (!this.closed && !this.input.closed) {\n if (retries >= maxRetry) {\n throw new Error(`failed to connect to Deepgram after ${retries} attempts: ${e}`);\n }\n\n const delay = Math.min(retries * 5, 10);\n retries++;\n\n this.#logger.warn(\n `failed to connect to Deepgram, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,\n );\n await new Promise((resolve) => setTimeout(resolve, delay * 1000));\n } else {\n this.#logger.warn(\n `Deepgram disconnected, connection is closed: ${e} (inputClosed: ${this.input.closed}, isClosed: ${this.closed})`,\n );\n }\n }\n }\n\n this.closed = true;\n }\n\n updateOptions(opts: Partial<STTOptions>) {\n this.#opts = { ...this.#opts, ...opts };\n this.#resetWS.resolve();\n }\n\n async #runWS(ws: WebSocket) {\n this.#resetWS = new Future();\n let closing = false;\n\n const keepalive = setInterval(() => {\n try {\n ws.send(JSON.stringify({ type: 'KeepAlive' }));\n } catch {\n clearInterval(keepalive);\n return;\n }\n }, 5000);\n\n // gets cancelled also when sendTask is complete\n const wsMonitor = Task.from(async (controller) => {\n const closed = new Promise<void>(async (_, reject) => {\n ws.once('close', (code, reason) => {\n if (!closing) {\n this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);\n reject(new Error('WebSocket closed'));\n }\n });\n });\n\n await Promise.race([closed, waitForAbort(controller.signal)]);\n });\n\n const sendTask = async () => {\n const samples100Ms = Math.floor(this.#opts.sampleRate / 10);\n const stream = new AudioByteStream(\n this.#opts.sampleRate,\n this.#opts.numChannels,\n samples100Ms,\n );\n\n // waitForAbort internally sets up an abort listener on the abort signal\n // we need to put it outside loop to avoid constant re-registration of the listener\n const abortPromise = waitForAbort(this.abortSignal);\n\n try {\n while (!this.closed) {\n const result = await Promise.race([this.input.next(), abortPromise]);\n\n if (result === undefined) return; // aborted\n if (result.done) {\n break;\n }\n\n const data = result.value;\n\n let frames: AudioFrame[];\n if (data === SpeechStream.FLUSH_SENTINEL) {\n frames = stream.flush();\n this.#audioDurationCollector.flush();\n } else if (\n data.sampleRate === this.#opts.sampleRate ||\n data.channels === this.#opts.numChannels\n ) {\n frames = stream.write(data.data.buffer as ArrayBuffer);\n } else {\n throw new Error(`sample rate or channel count of frame does not match`);\n }\n\n for await (const frame of frames) {\n if (this.#audioEnergyFilter.pushFrame(frame)) {\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n this.#audioDurationCollector.push(frameDuration);\n ws.send(frame.data.buffer);\n }\n }\n }\n } finally {\n closing = true;\n ws.send(JSON.stringify({ type: 'CloseStream' }));\n wsMonitor.cancel();\n }\n };\n\n const listenTask = Task.from(async (controller) => {\n const putMessage = (message: stt.SpeechEvent) => {\n if (!this.queue.closed) {\n try {\n this.queue.put(message);\n } catch (e) {\n // ignore\n }\n }\n };\n\n const listenMessage = new Promise<void>((resolve, reject) => {\n ws.on('message', (msg) => {\n try {\n const json = JSON.parse(msg.toString());\n switch (json['type']) {\n case 'SpeechStarted': {\n // This is a normal case. Deepgram's SpeechStarted events\n // are not correlated with speech_final or utterance end.\n // It's possible that we receive two in a row without an endpoint\n // It's also possible we receive a transcript without a SpeechStarted event.\n if (this.#speaking) return;\n this.#speaking = true;\n putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });\n break;\n }\n // see this page:\n // https://developers.deepgram.com/docs/understand-endpointing-interim-results#using-endpointing-speech_final\n // for more information about the different types of events\n case 'Results': {\n const metadata = json['metadata'];\n const requestId = metadata['request_id'];\n const isFinal = json['is_final'];\n const isEndpoint = json['speech_final'];\n this.#requestId = requestId;\n\n const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json);\n\n // If, for some reason, we didn't get a SpeechStarted event but we got\n // a transcript with text, we should start speaking. It's rare but has\n // been observed.\n if (alternatives[0] && alternatives[0].text) {\n if (!this.#speaking) {\n this.#speaking = true;\n putMessage({\n type: stt.SpeechEventType.START_OF_SPEECH,\n });\n }\n\n if (isFinal) {\n putMessage({\n type: stt.SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n } else {\n putMessage({\n type: stt.SpeechEventType.INTERIM_TRANSCRIPT,\n alternatives: [alternatives[0], ...alternatives.slice(1)],\n });\n }\n }\n\n // if we receive an endpoint, only end the speech if\n // we either had a SpeechStarted event or we have a seen\n // a non-empty transcript (deepgram doesn't have a SpeechEnded event)\n if (isEndpoint && this.#speaking) {\n this.#speaking = false;\n putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });\n }\n\n break;\n }\n case 'Metadata': {\n break;\n }\n default: {\n this.#logger.child({ msg: json }).warn('received unexpected message from Deepgram');\n break;\n }\n }\n\n if (this.closed || closing) {\n resolve();\n }\n } catch (err) {\n this.#logger.error(`STT: Error processing message: ${msg}`);\n reject(err);\n }\n });\n });\n\n await Promise.race([listenMessage, waitForAbort(controller.signal)]);\n }, this.abortController);\n\n await Promise.race([\n this.#resetWS.await,\n Promise.all([sendTask(), listenTask.result, wsMonitor]),\n ]);\n closing = true;\n ws.close();\n clearInterval(keepalive);\n }\n\n private onAudioDurationReport(duration: number) {\n const usageEvent: stt.SpeechEvent = {\n type: stt.SpeechEventType.RECOGNITION_USAGE,\n requestId: this.#requestId,\n recognitionUsage: {\n audioDuration: duration,\n },\n };\n this.queue.put(usageEvent);\n }\n}\n\nconst liveTranscriptionToSpeechData = (\n language: STTLanguages | string,\n data: { [id: string]: any },\n): stt.SpeechData[] => {\n const alts: any[] = data['channel']['alternatives'];\n\n return alts.map((alt) => ({\n language,\n startTime: alt['words'].length ? alt['words'][0]['start'] : 0,\n endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0,\n confidence: alt['confidence'],\n text: alt['transcript'],\n }));\n};\n"],"mappings":"AAGA;AAAA,EAGE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,iBAAiB;AAC1B,SAAS,yBAAyB;AAGlC,MAAM,kBAAkB;AAwBxB,MAAM,oBAAgC;AAAA,EACpC,QAAQ,QAAQ,IAAI;AAAA,EACpB,UAAU;AAAA,EACV,gBAAgB;AAAA,EAChB,gBAAgB;AAAA,EAChB,WAAW;AAAA,EACX,OAAO;AAAA,EACP,aAAa;AAAA,EACb,SAAS;AAAA,EACT,aAAa;AAAA,EACb,aAAa;AAAA,EACb,YAAY;AAAA,EACZ,aAAa;AAAA,EACb,UAAU,CAAC;AAAA,EACX,SAAS,CAAC;AAAA,EACV,iBAAiB;AAAA,EACjB,WAAW;AAAA,EACX,SAAS;AAAA,EACT,UAAU;AAAA,EACV,WAAW;AACb;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EAC/B;AAAA,EACA,UAAU,IAAI;AAAA,EACd,QAAQ;AAAA,EACA,kBAAkB,IAAI,gBAAgB;AAAA,EAE9C,YAAY,OAA4B,mBAAmB;AACzD,UAAM;AAAA,MACJ,WAAW;AAAA,MACX,gBAAgB,KAAK,kBAAkB,kBAAkB;AAAA,IAC3D,CAAC;AACD,QAAI,KAAK,WAAW,UAAa,kBAAkB,WAAW,QAAW;AACvE,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAEA,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,KAAK,MAAM,gBAAgB;AAC7B,WAAK,MAAM,WAAW;AAAA,IACxB,WACE,KAAK,MAAM,YACX,CAAC,CAAC,SAAS,IAAI,EAAE,SAAS,KAAK,MAAM,QAAQ,KAC7C;AAAA,MACE;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE,SAAS,KAAK,MAAM,KAAK,GAC3B;AACA,WAAK,QAAQ;AAAA,QACX,GAAG,KAAK,MAAM,KAAK,8BAA8B,KAAK,MAAM,QAAQ;AAAA,MACtE;AACA,WAAK,MAAM,QAAQ;AAAA,IACrB;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,GAA0C;AACzD,UAAM,IAAI,MAAM,4CAA4C;AAAA,EAC9D;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AAAA,EACxC;AAAA,EAEA,OAAO,SAA6D;AAClE,WAAO,IAAI,aAAa,MAAM,KAAK,OAAO,mCAAS,WAAW;AAAA,EAChE;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AACF;AAEO,MAAM,qBAAqB,IAAI,aAAa;AAAA,EACjD;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EACd,YAAY;AAAA,EACZ,WAAW,IAAI,OAAO;AAAA,EACtB,aAAa;AAAA,EACb;AAAA,EACA,QAAQ;AAAA,EAER,YAAYA,MAAU,MAAkB,aAAiC;AACvE,UAAMA,MAAK,KAAK,YAAY,WAAW;AACvC,SAAK,QAAQ;AACb,SAAK,SAAS;AACd,SAAK,qBAAqB,IAAI,kBAAkB;AAChD,SAAK,0BAA0B,IAAI;AAAA,MACjC,CAAC,aAAa,KAAK,sBAAsB,QAAQ;AAAA,MACjD,EAAE,UAAU,EAAI;AAAA,IAClB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,WAAW;AACjB,QAAI,UAAU;AACd,QAAI;AAEJ,WAAO,CAAC,KAAK,MAAM,UAAU,CAAC,KAAK,QAAQ;AACzC,YAAM,YAAY,IAAI,IAAI,eAAe;AACzC,YAAM,SAAS;AAAA,QACb,OAAO,KAAK,MAAM;AAAA,QAClB,WAAW,KAAK,MAAM;AAAA,QACtB,cAAc,KAAK,MAAM;AAAA,QACzB,WAAW,KAAK,MAAM;AAAA,QACtB,SAAS,KAAK,MAAM;AAAA,QACpB,UAAU,KAAK,MAAM;AAAA,QACrB,UAAU,KAAK,MAAM;AAAA,QACrB,iBAAiB,KAAK,MAAM;AAAA,QAC5B,UAAU;AAAA,QACV,YAAY;AAAA,QACZ,aAAa,KAAK,MAAM;AAAA,QACxB,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM,eAAe;AAAA,QACvC,cAAc,KAAK,MAAM;AAAA,QACzB,UAAU,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,GAAG,CAAC;AAAA,QACpD,SAAS,KAAK,MAAM;AAAA,QACpB,kBAAkB,KAAK,MAAM;AAAA,QAC7B,UAAU,KAAK,MAAM;AAAA,QACrB,aAAa,KAAK,MAAM;AAAA,MAC1B;AACA,aAAO,QAAQ,MAAM,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,MAAM;AACzC,YAAI,MAAM,QAAW;AACnB,cAAI,OAAO,MAAM,YAAY,OAAO,MAAM,YAAY,OAAO,MAAM,WAAW;AAC5E,sBAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC;AAAA,UACxD,OAAO;AACL,cAAE,QAAQ,CAAC,MAAM,UAAU,aAAa,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC;AAAA,UAC1E;AAAA,QACF;AAAA,MACF,CAAC;AAED,WAAK,IAAI,UAAU,WAAW;AAAA,QAC5B,SAAS,EAAE,eAAe,SAAS,KAAK,MAAM,MAAM,GAAG;AAAA,MACzD,CAAC;AAED,UAAI;AACF,cAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,aAAG,GAAG,QAAQ,OAAO;AACrB,aAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,aAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,QAC/D,CAAC;AAED,cAAM,KAAK,OAAO,EAAE;AAAA,MACtB,SAAS,GAAG;AACV,YAAI,CAAC,KAAK,UAAU,CAAC,KAAK,MAAM,QAAQ;AACtC,cAAI,WAAW,UAAU;AACvB,kBAAM,IAAI,MAAM,uCAAuC,OAAO,cAAc,CAAC,EAAE;AAAA,UACjF;AAEA,gBAAM,QAAQ,KAAK,IAAI,UAAU,GAAG,EAAE;AACtC;AAEA,eAAK,QAAQ;AAAA,YACX,8CAA8C,KAAK,aAAa,CAAC,KAAK,OAAO,IAAI,QAAQ;AAAA,UAC3F;AACA,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,QAAQ,GAAI,CAAC;AAAA,QAClE,OAAO;AACL,eAAK,QAAQ;AAAA,YACX,gDAAgD,CAAC,kBAAkB,KAAK,MAAM,MAAM,eAAe,KAAK,MAAM;AAAA,UAChH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,cAAc,MAA2B;AACvC,SAAK,QAAQ,EAAE,GAAG,KAAK,OAAO,GAAG,KAAK;AACtC,SAAK,SAAS,QAAQ;AAAA,EACxB;AAAA,EAEA,MAAM,OAAO,IAAe;AAC1B,SAAK,WAAW,IAAI,OAAO;AAC3B,QAAI,UAAU;AAEd,UAAM,YAAY,YAAY,MAAM;AAClC,UAAI;AACF,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC,CAAC;AAAA,MAC/C,QAAQ;AACN,sBAAc,SAAS;AACvB;AAAA,MACF;AAAA,IACF,GAAG,GAAI;AAGP,UAAM,YAAY,KAAK,KAAK,OAAO,eAAe;AAChD,YAAM,SAAS,IAAI,QAAc,OAAO,GAAG,WAAW;AACpD,WAAG,KAAK,SAAS,CAAC,MAAM,WAAW;AACjC,cAAI,CAAC,SAAS;AACZ,iBAAK,QAAQ,MAAM,8BAA8B,IAAI,KAAK,MAAM,EAAE;AAClE,mBAAO,IAAI,MAAM,kBAAkB,CAAC;AAAA,UACtC;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,QAAQ,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IAC9D,CAAC;AAED,UAAM,WAAW,YAAY;AAC3B,YAAM,eAAe,KAAK,MAAM,KAAK,MAAM,aAAa,EAAE;AAC1D,YAAM,SAAS,IAAI;AAAA,QACjB,KAAK,MAAM;AAAA,QACX,KAAK,MAAM;AAAA,QACX;AAAA,MACF;AAIA,YAAM,eAAe,aAAa,KAAK,WAAW;AAElD,UAAI;AACF,eAAO,CAAC,KAAK,QAAQ;AACnB,gBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,KAAK,MAAM,KAAK,GAAG,YAAY,CAAC;AAEnE,cAAI,WAAW,OAAW;AAC1B,cAAI,OAAO,MAAM;AACf;AAAA,UACF;AAEA,gBAAM,OAAO,OAAO;AAEpB,cAAI;AACJ,cAAI,SAAS,aAAa,gBAAgB;AACxC,qBAAS,OAAO,MAAM;AACtB,iBAAK,wBAAwB,MAAM;AAAA,UACrC,WACE,KAAK,eAAe,KAAK,MAAM,cAC/B,KAAK,aAAa,KAAK,MAAM,aAC7B;AACA,qBAAS,OAAO,MAAM,KAAK,KAAK,MAAqB;AAAA,UACvD,OAAO;AACL,kBAAM,IAAI,MAAM,sDAAsD;AAAA,UACxE;AAEA,2BAAiB,SAAS,QAAQ;AAChC,gBAAI,KAAK,mBAAmB,UAAU,KAAK,GAAG;AAC5C,oBAAM,gBAAgB,MAAM,oBAAoB,MAAM;AACtD,mBAAK,wBAAwB,KAAK,aAAa;AAC/C,iBAAG,KAAK,MAAM,KAAK,MAAM;AAAA,YAC3B;AAAA,UACF;AAAA,QACF;AAAA,MACF,UAAE;AACA,kBAAU;AACV,WAAG,KAAK,KAAK,UAAU,EAAE,MAAM,cAAc,CAAC,CAAC;AAC/C,kBAAU,OAAO;AAAA,MACnB;AAAA,IACF;AAEA,UAAM,aAAa,KAAK,KAAK,OAAO,eAAe;AACjD,YAAM,aAAa,CAAC,YAA6B;AAC/C,YAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,cAAI;AACF,iBAAK,MAAM,IAAI,OAAO;AAAA,UACxB,SAAS,GAAG;AAAA,UAEZ;AAAA,QACF;AAAA,MACF;AAEA,YAAM,gBAAgB,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3D,WAAG,GAAG,WAAW,CAAC,QAAQ;AACxB,cAAI;AACF,kBAAM,OAAO,KAAK,MAAM,IAAI,SAAS,CAAC;AACtC,oBAAQ,KAAK,MAAM,GAAG;AAAA,cACpB,KAAK,iBAAiB;AAKpB,oBAAI,KAAK,UAAW;AACpB,qBAAK,YAAY;AACjB,2BAAW,EAAE,MAAM,IAAI,gBAAgB,gBAAgB,CAAC;AACxD;AAAA,cACF;AAAA;AAAA;AAAA;AAAA,cAIA,KAAK,WAAW;AACd,sBAAM,WAAW,KAAK,UAAU;AAChC,sBAAM,YAAY,SAAS,YAAY;AACvC,sBAAM,UAAU,KAAK,UAAU;AAC/B,sBAAM,aAAa,KAAK,cAAc;AACtC,qBAAK,aAAa;AAElB,sBAAM,eAAe,8BAA8B,KAAK,MAAM,UAAW,IAAI;AAK7E,oBAAI,aAAa,CAAC,KAAK,aAAa,CAAC,EAAE,MAAM;AAC3C,sBAAI,CAAC,KAAK,WAAW;AACnB,yBAAK,YAAY;AACjB,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,oBAC5B,CAAC;AAAA,kBACH;AAEA,sBAAI,SAAS;AACX,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH,OAAO;AACL,+BAAW;AAAA,sBACT,MAAM,IAAI,gBAAgB;AAAA,sBAC1B,cAAc,CAAC,aAAa,CAAC,GAAG,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,oBAC1D,CAAC;AAAA,kBACH;AAAA,gBACF;AAKA,oBAAI,cAAc,KAAK,WAAW;AAChC,uBAAK,YAAY;AACjB,6BAAW,EAAE,MAAM,IAAI,gBAAgB,cAAc,CAAC;AAAA,gBACxD;AAEA;AAAA,cACF;AAAA,cACA,KAAK,YAAY;AACf;AAAA,cACF;AAAA,cACA,SAAS;AACP,qBAAK,QAAQ,MAAM,EAAE,KAAK,KAAK,CAAC,EAAE,KAAK,2CAA2C;AAClF;AAAA,cACF;AAAA,YACF;AAEA,gBAAI,KAAK,UAAU,SAAS;AAC1B,sBAAQ;AAAA,YACV;AAAA,UACF,SAAS,KAAK;AACZ,iBAAK,QAAQ,MAAM,kCAAkC,GAAG,EAAE;AAC1D,mBAAO,GAAG;AAAA,UACZ;AAAA,QACF,CAAC;AAAA,MACH,CAAC;AAED,YAAM,QAAQ,KAAK,CAAC,eAAe,aAAa,WAAW,MAAM,CAAC,CAAC;AAAA,IACrE,GAAG,KAAK,eAAe;AAEvB,UAAM,QAAQ,KAAK;AAAA,MACjB,KAAK,SAAS;AAAA,MACd,QAAQ,IAAI,CAAC,SAAS,GAAG,WAAW,QAAQ,SAAS,CAAC;AAAA,IACxD,CAAC;AACD,cAAU;AACV,OAAG,MAAM;AACT,kBAAc,SAAS;AAAA,EACzB;AAAA,EAEQ,sBAAsB,UAAkB;AAC9C,UAAM,aAA8B;AAAA,MAClC,MAAM,IAAI,gBAAgB;AAAA,MAC1B,WAAW,KAAK;AAAA,MAChB,kBAAkB;AAAA,QAChB,eAAe;AAAA,MACjB;AAAA,IACF;AACA,SAAK,MAAM,IAAI,UAAU;AAAA,EAC3B;AACF;AAEA,MAAM,gCAAgC,CACpC,UACA,SACqB;AACrB,QAAM,OAAc,KAAK,SAAS,EAAE,cAAc;AAElD,SAAO,KAAK,IAAI,CAAC,SAAS;AAAA,IACxB;AAAA,IACA,WAAW,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,CAAC,EAAE,OAAO,IAAI;AAAA,IAC5D,SAAS,IAAI,OAAO,EAAE,SAAS,IAAI,OAAO,EAAE,IAAI,OAAO,EAAE,SAAS,CAAC,EAAE,KAAK,IAAI;AAAA,IAC9E,YAAY,IAAI,YAAY;AAAA,IAC5B,MAAM,IAAI,YAAY;AAAA,EACxB,EAAE;AACJ;","names":["stt"]}
|
package/dist/tts.cjs
CHANGED
|
@@ -60,8 +60,8 @@ class TTS extends import_agents.tts.TTS {
|
|
|
60
60
|
);
|
|
61
61
|
}
|
|
62
62
|
}
|
|
63
|
-
synthesize(text) {
|
|
64
|
-
return new ChunkedStream(this, text, this.opts);
|
|
63
|
+
synthesize(text, connOptions, abortSignal) {
|
|
64
|
+
return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
|
|
65
65
|
}
|
|
66
66
|
stream() {
|
|
67
67
|
return new SynthesizeStream(this, this.opts);
|
|
@@ -69,10 +69,11 @@ class TTS extends import_agents.tts.TTS {
|
|
|
69
69
|
}
|
|
70
70
|
class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
71
71
|
label = "deepgram.ChunkedStream";
|
|
72
|
+
#logger = (0, import_agents.log)();
|
|
72
73
|
opts;
|
|
73
74
|
text;
|
|
74
|
-
constructor(tts2, text, opts) {
|
|
75
|
-
super(text, tts2);
|
|
75
|
+
constructor(tts2, text, opts, connOptions, abortSignal) {
|
|
76
|
+
super(text, tts2, connOptions, abortSignal);
|
|
76
77
|
this.text = text;
|
|
77
78
|
this.opts = opts;
|
|
78
79
|
}
|
|
@@ -94,7 +95,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
94
95
|
headers: {
|
|
95
96
|
[AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey}`,
|
|
96
97
|
"Content-Type": "application/json"
|
|
97
|
-
}
|
|
98
|
+
},
|
|
99
|
+
signal: this.abortSignal
|
|
98
100
|
},
|
|
99
101
|
(res) => {
|
|
100
102
|
if (res.statusCode !== 200) {
|
|
@@ -116,7 +118,8 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
116
118
|
}
|
|
117
119
|
});
|
|
118
120
|
res.on("error", (err) => {
|
|
119
|
-
|
|
121
|
+
if (err.message === "aborted") return;
|
|
122
|
+
this.#logger.error({ err }, "Deepgram TTS response error");
|
|
120
123
|
});
|
|
121
124
|
res.on("close", () => {
|
|
122
125
|
for (const frame of bstream.flush()) {
|
|
@@ -137,8 +140,10 @@ class ChunkedStream extends import_agents.tts.ChunkedStream {
|
|
|
137
140
|
}
|
|
138
141
|
);
|
|
139
142
|
req.on("error", (err) => {
|
|
140
|
-
|
|
143
|
+
if (err.name === "AbortError") return;
|
|
144
|
+
this.#logger.error({ err }, "Deepgram TTS request error");
|
|
141
145
|
});
|
|
146
|
+
req.on("close", () => resolve());
|
|
142
147
|
req.write(JSON.stringify(json));
|
|
143
148
|
req.end();
|
|
144
149
|
});
|
package/dist/tts.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n private opts: TTSOptions;\n private text: string;\n\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n reject(err);\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA0D;AAE1D,wBAAwB;AACxB,gBAAwC;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAzC9C;AA0CI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,IAAI;AAAA,EAChD;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,UAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,QACF;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,mBAAO,GAAG;AAAA,UACZ,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,eAAO,GAAG;AAAA,MACZ,CAAC;AAED,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n #logger = log();\n private opts: TTSOptions;\n private text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n signal: this.abortSignal,\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Deepgram TTS response error');\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Deepgram TTS request error');\n });\n\n req.on('close', () => resolve());\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAOO;AAEP,wBAAwB;AACxB,gBAAwC;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,uBAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,kBAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAhD9C;AAiDI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,MAAM,aAAa,WAAW;AAAA,EAC1E;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,kBAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,cAAU,mBAAI;AAAA,EACN;AAAA,EACA;AAAA,EAER,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,UAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,UACA,QAAQ,KAAK;AAAA,QACf;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,gBAAI,IAAI,YAAY,UAAW;AAC/B,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,UAC3D,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,YAAI,IAAI,SAAS,aAAc;AAC/B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,MAC1D,CAAC;AAED,UAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAC/B,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,kBAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,oBAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,oBAAU,QAAQ,GAAG,eAAe,oBAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,gBAAY,yBAAU;AAC5B,UAAM,gBAAY,yBAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,oBAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,8BAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
|
package/dist/tts.d.cts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tokenize, tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
|
|
2
2
|
import type { TTSEncoding, TTSModels } from './models.js';
|
|
3
3
|
export interface TTSOptions {
|
|
4
4
|
model: TTSModels | string;
|
|
@@ -13,14 +13,15 @@ export declare class TTS extends tts.TTS {
|
|
|
13
13
|
private opts;
|
|
14
14
|
label: string;
|
|
15
15
|
constructor(opts?: Partial<TTSOptions>);
|
|
16
|
-
synthesize(text: string): tts.ChunkedStream;
|
|
16
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
|
|
17
17
|
stream(): tts.SynthesizeStream;
|
|
18
18
|
}
|
|
19
19
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
20
|
+
#private;
|
|
20
21
|
label: string;
|
|
21
22
|
private opts;
|
|
22
23
|
private text;
|
|
23
|
-
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
24
|
+
constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
24
25
|
protected run(): Promise<void>;
|
|
25
26
|
}
|
|
26
27
|
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
package/dist/tts.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { tokenize, tts } from '@livekit/agents';
|
|
1
|
+
import { type APIConnectOptions, tokenize, tts } from '@livekit/agents';
|
|
2
2
|
import type { TTSEncoding, TTSModels } from './models.js';
|
|
3
3
|
export interface TTSOptions {
|
|
4
4
|
model: TTSModels | string;
|
|
@@ -13,14 +13,15 @@ export declare class TTS extends tts.TTS {
|
|
|
13
13
|
private opts;
|
|
14
14
|
label: string;
|
|
15
15
|
constructor(opts?: Partial<TTSOptions>);
|
|
16
|
-
synthesize(text: string): tts.ChunkedStream;
|
|
16
|
+
synthesize(text: string, connOptions?: APIConnectOptions, abortSignal?: AbortSignal): tts.ChunkedStream;
|
|
17
17
|
stream(): tts.SynthesizeStream;
|
|
18
18
|
}
|
|
19
19
|
export declare class ChunkedStream extends tts.ChunkedStream {
|
|
20
|
+
#private;
|
|
20
21
|
label: string;
|
|
21
22
|
private opts;
|
|
22
23
|
private text;
|
|
23
|
-
constructor(tts: TTS, text: string, opts: TTSOptions);
|
|
24
|
+
constructor(tts: TTS, text: string, opts: TTSOptions, connOptions?: APIConnectOptions, abortSignal?: AbortSignal);
|
|
24
25
|
protected run(): Promise<void>;
|
|
25
26
|
}
|
|
26
27
|
export declare class SynthesizeStream extends tts.SynthesizeStream {
|
package/dist/tts.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../src/tts.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,KAAK,iBAAiB,EAItB,QAAQ,EACR,GAAG,EACJ,MAAM,iBAAiB,CAAC;AAIzB,OAAO,KAAK,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAM1D,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,SAAS,GAAG,MAAM,CAAC;IAC1B,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iBAAiB,EAAE,QAAQ,CAAC,iBAAiB,CAAC;IAC9C,YAAY,EAAE,GAAG,CAAC,eAAe,CAAC;CACnC;AAgBD,qBAAa,GAAI,SAAQ,GAAG,CAAC,GAAG;IAC9B,OAAO,CAAC,IAAI,CAAa;IACzB,KAAK,SAAkB;gBAEX,IAAI,GAAE,OAAO,CAAC,UAAU,CAAM;IAiB1C,UAAU,CACR,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW,GACxB,GAAG,CAAC,aAAa;IAIpB,MAAM,IAAI,GAAG,CAAC,gBAAgB;CAG/B;AAED,qBAAa,aAAc,SAAQ,GAAG,CAAC,aAAa;;IAClD,KAAK,SAA4B;IAEjC,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,IAAI,CAAS;gBAGnB,GAAG,EAAE,GAAG,EACR,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,UAAU,EAChB,WAAW,CAAC,EAAE,iBAAiB,EAC/B,WAAW,CAAC,EAAE,WAAW;cAOX,GAAG;CA6EpB;AAED,qBAAa,gBAAiB,SAAQ,GAAG,CAAC,gBAAgB;IACxD,OAAO,CAAC,IAAI,CAAa;IACzB,OAAO,CAAC,SAAS,CAA0B;IAC3C,KAAK,SAA+B;IAEpC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;IACtE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAqC;gBAE1D,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,UAAU;YAMxB,cAAc;cA4CZ,GAAG;CA4IpB"}
|
package/dist/tts.js
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
AudioByteStream,
|
|
3
|
+
log,
|
|
4
|
+
shortuuid,
|
|
5
|
+
tokenize,
|
|
6
|
+
tts
|
|
7
|
+
} from "@livekit/agents";
|
|
2
8
|
import { request } from "node:https";
|
|
3
9
|
import { WebSocket } from "ws";
|
|
4
10
|
const AUTHORIZATION_HEADER = "Authorization";
|
|
@@ -35,8 +41,8 @@ class TTS extends tts.TTS {
|
|
|
35
41
|
);
|
|
36
42
|
}
|
|
37
43
|
}
|
|
38
|
-
synthesize(text) {
|
|
39
|
-
return new ChunkedStream(this, text, this.opts);
|
|
44
|
+
synthesize(text, connOptions, abortSignal) {
|
|
45
|
+
return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
|
|
40
46
|
}
|
|
41
47
|
stream() {
|
|
42
48
|
return new SynthesizeStream(this, this.opts);
|
|
@@ -44,10 +50,11 @@ class TTS extends tts.TTS {
|
|
|
44
50
|
}
|
|
45
51
|
class ChunkedStream extends tts.ChunkedStream {
|
|
46
52
|
label = "deepgram.ChunkedStream";
|
|
53
|
+
#logger = log();
|
|
47
54
|
opts;
|
|
48
55
|
text;
|
|
49
|
-
constructor(tts2, text, opts) {
|
|
50
|
-
super(text, tts2);
|
|
56
|
+
constructor(tts2, text, opts, connOptions, abortSignal) {
|
|
57
|
+
super(text, tts2, connOptions, abortSignal);
|
|
51
58
|
this.text = text;
|
|
52
59
|
this.opts = opts;
|
|
53
60
|
}
|
|
@@ -69,7 +76,8 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
69
76
|
headers: {
|
|
70
77
|
[AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey}`,
|
|
71
78
|
"Content-Type": "application/json"
|
|
72
|
-
}
|
|
79
|
+
},
|
|
80
|
+
signal: this.abortSignal
|
|
73
81
|
},
|
|
74
82
|
(res) => {
|
|
75
83
|
if (res.statusCode !== 200) {
|
|
@@ -91,7 +99,8 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
91
99
|
}
|
|
92
100
|
});
|
|
93
101
|
res.on("error", (err) => {
|
|
94
|
-
|
|
102
|
+
if (err.message === "aborted") return;
|
|
103
|
+
this.#logger.error({ err }, "Deepgram TTS response error");
|
|
95
104
|
});
|
|
96
105
|
res.on("close", () => {
|
|
97
106
|
for (const frame of bstream.flush()) {
|
|
@@ -112,8 +121,10 @@ class ChunkedStream extends tts.ChunkedStream {
|
|
|
112
121
|
}
|
|
113
122
|
);
|
|
114
123
|
req.on("error", (err) => {
|
|
115
|
-
|
|
124
|
+
if (err.name === "AbortError") return;
|
|
125
|
+
this.#logger.error({ err }, "Deepgram TTS request error");
|
|
116
126
|
});
|
|
127
|
+
req.on("close", () => resolve());
|
|
117
128
|
req.write(JSON.stringify(json));
|
|
118
129
|
req.end();
|
|
119
130
|
});
|
package/dist/tts.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioByteStream, shortuuid, tokenize, tts } from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(text: string): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n private opts: TTSOptions;\n private text: string;\n\n constructor(tts: TTS, text: string, opts: TTSOptions) {\n super(text, tts);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n reject(err);\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n reject(err);\n });\n\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA,SAAS,iBAAiB,WAAW,UAAU,WAAW;AAE1D,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,SAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAzC9C;AA0CI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WAAW,MAAiC;AAC1C,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,IAAI;AAAA,EAChD;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAYA,MAAU,MAAc,MAAkB;AACpD,UAAM,MAAMA,IAAG;AACf,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,MAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,QACF;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,mBAAO,GAAG;AAAA,UACZ,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,eAAO,GAAG;AAAA,MACZ,CAAC;AAED,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
|
|
1
|
+
{"version":3,"sources":["../src/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport {\n type APIConnectOptions,\n AudioByteStream,\n log,\n shortuuid,\n tokenize,\n tts,\n} from '@livekit/agents';\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { request } from 'node:https';\nimport { type RawData, WebSocket } from 'ws';\nimport type { TTSEncoding, TTSModels } from './models.js';\n\nconst AUTHORIZATION_HEADER = 'Authorization';\nconst NUM_CHANNELS = 1;\nconst MIN_SENTENCE_LENGTH = 8;\n\nexport interface TTSOptions {\n model: TTSModels | string;\n encoding: TTSEncoding;\n sampleRate: number;\n apiKey?: string;\n baseUrl?: string;\n sentenceTokenizer: tokenize.SentenceTokenizer;\n capabilities: tts.TTSCapabilities;\n}\n\nconst defaultTTSOptions: TTSOptions = {\n model: 'aura-asteria-en',\n encoding: 'linear16',\n sampleRate: 24000,\n apiKey: process.env.DEEPGRAM_API_KEY,\n baseUrl: 'https://api.deepgram.com',\n capabilities: {\n streaming: true,\n },\n sentenceTokenizer: new tokenize.basic.SentenceTokenizer({\n minSentenceLength: MIN_SENTENCE_LENGTH,\n }),\n};\n\nexport class TTS extends tts.TTS {\n private opts: TTSOptions;\n label = 'deepgram.TTS';\n\n constructor(opts: Partial<TTSOptions> = {}) {\n super(opts.sampleRate || defaultTTSOptions.sampleRate, NUM_CHANNELS, {\n streaming: opts.capabilities?.streaming ?? defaultTTSOptions.capabilities.streaming,\n });\n\n this.opts = {\n ...defaultTTSOptions,\n ...opts,\n };\n\n if (this.opts.apiKey === undefined) {\n throw new Error(\n 'Deepgram API key is required, whether as an argument or as $DEEPGRAM_API_KEY',\n );\n }\n }\n\n synthesize(\n text: string,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ): tts.ChunkedStream {\n return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);\n }\n\n stream(): tts.SynthesizeStream {\n return new SynthesizeStream(this, this.opts);\n }\n}\n\nexport class ChunkedStream extends tts.ChunkedStream {\n label = 'deepgram.ChunkedStream';\n #logger = log();\n private opts: TTSOptions;\n private text: string;\n\n constructor(\n tts: TTS,\n text: string,\n opts: TTSOptions,\n connOptions?: APIConnectOptions,\n abortSignal?: AbortSignal,\n ) {\n super(text, tts, connOptions, abortSignal);\n this.text = text;\n this.opts = opts;\n }\n\n protected async run() {\n const requestId = shortuuid();\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n const json = { text: this.text };\n const url = new URL(`${this.opts.baseUrl!}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n await new Promise<void>((resolve, reject) => {\n const req = request(\n {\n hostname: url.hostname,\n port: 443,\n path: url.pathname + url.search,\n method: 'POST',\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n 'Content-Type': 'application/json',\n },\n signal: this.abortSignal,\n },\n (res) => {\n if (res.statusCode !== 200) {\n reject(\n new Error(`Deepgram TTS HTTP request failed: ${res.statusCode} ${res.statusMessage}`),\n );\n return;\n }\n\n res.on('data', (chunk) => {\n for (const frame of bstream.write(chunk)) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n });\n\n res.on('error', (err) => {\n if (err.message === 'aborted') return;\n this.#logger.error({ err }, 'Deepgram TTS response error');\n });\n\n res.on('close', () => {\n for (const frame of bstream.flush()) {\n if (!this.queue.closed) {\n this.queue.put({\n requestId,\n frame,\n final: false,\n segmentId: requestId,\n });\n }\n }\n if (!this.queue.closed) {\n this.queue.close();\n }\n resolve();\n });\n },\n );\n\n req.on('error', (err) => {\n if (err.name === 'AbortError') return;\n this.#logger.error({ err }, 'Deepgram TTS request error');\n });\n\n req.on('close', () => resolve());\n req.write(JSON.stringify(json));\n req.end();\n });\n }\n}\n\nexport class SynthesizeStream extends tts.SynthesizeStream {\n private opts: TTSOptions;\n private tokenizer: tokenize.SentenceStream;\n label = 'deepgram.SynthesizeStream';\n\n private static readonly FLUSH_MSG = JSON.stringify({ type: 'Flush' });\n private static readonly CLOSE_MSG = JSON.stringify({ type: 'Close' });\n\n constructor(tts: TTS, opts: TTSOptions) {\n super(tts);\n this.opts = opts;\n this.tokenizer = opts.sentenceTokenizer.stream();\n }\n\n private async closeWebSocket(ws: WebSocket): Promise<void> {\n try {\n // Send Flush and Close messages to ensure Deepgram processes all remaining audio\n // and properly terminates the session, preventing lingering TTS sessions\n if (ws.readyState === WebSocket.OPEN) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n ws.send(SynthesizeStream.CLOSE_MSG);\n\n // Wait for server acknowledgment to prevent race conditions and ensure\n // proper cleanup, avoiding 429 Too Many Requests errors from lingering sessions\n try {\n await new Promise<void>((resolve, _reject) => {\n const timeout = setTimeout(() => {\n resolve();\n }, 1000);\n\n ws.once('message', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('close', () => {\n clearTimeout(timeout);\n resolve();\n });\n\n ws.once('error', () => {\n clearTimeout(timeout);\n resolve();\n });\n });\n } catch (e) {\n // Ignore timeout or other errors during close sequence\n }\n }\n } catch (e) {\n console.warn(`Error during WebSocket close sequence: ${e}`);\n } finally {\n if (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING) {\n ws.close();\n }\n }\n }\n\n protected async run() {\n const requestId = shortuuid();\n const segmentId = shortuuid();\n\n const wsUrl = this.opts.baseUrl!.replace(/^http/, 'ws');\n const url = new URL(`${wsUrl}/v1/speak`);\n url.searchParams.append('sample_rate', this.opts.sampleRate.toString());\n url.searchParams.append('model', this.opts.model);\n url.searchParams.append('encoding', this.opts.encoding);\n\n const ws = new WebSocket(url, {\n headers: {\n [AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,\n },\n });\n\n await new Promise((resolve, reject) => {\n ws.on('open', resolve);\n ws.on('error', (error) => reject(error));\n ws.on('close', (code) => reject(`WebSocket returned ${code}`));\n });\n\n const inputTask = async () => {\n for await (const data of this.input) {\n if (data === SynthesizeStream.FLUSH_SENTINEL) {\n this.tokenizer.flush();\n continue;\n }\n this.tokenizer.pushText(data);\n }\n this.tokenizer.endInput();\n this.tokenizer.close();\n };\n\n const sendTask = async () => {\n for await (const event of this.tokenizer) {\n if (this.abortController.signal.aborted) break;\n\n let text = event.token;\n if (!text.endsWith(' ')) {\n text += ' ';\n }\n\n const message = JSON.stringify({\n type: 'Speak',\n text: text,\n });\n\n ws.send(message);\n }\n\n if (!this.abortController.signal.aborted) {\n ws.send(SynthesizeStream.FLUSH_MSG);\n }\n };\n\n const recvTask = async () => {\n const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);\n let finalReceived = false;\n let timeout: NodeJS.Timeout | null = null;\n let lastFrame: AudioFrame | undefined;\n\n const sendLastFrame = (segmentId: string, final: boolean) => {\n if (lastFrame && !this.queue.closed) {\n this.queue.put({ requestId, segmentId, frame: lastFrame, final });\n lastFrame = undefined;\n }\n };\n\n const clearMessageTimeout = () => {\n if (timeout) {\n clearTimeout(timeout);\n timeout = null;\n }\n };\n\n return new Promise<void>((resolve, reject) => {\n ws.on('message', (data: RawData, isBinary: boolean) => {\n clearMessageTimeout();\n\n if (!isBinary) {\n const message = JSON.parse(data.toString());\n if (message.type === 'Flushed') {\n finalReceived = true;\n clearMessageTimeout();\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n resolve();\n }\n\n return;\n }\n\n const buffer =\n data instanceof Buffer\n ? data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength)\n : (data as ArrayBuffer);\n for (const frame of bstream.write(buffer as ArrayBuffer)) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n });\n\n ws.on('close', (_code, _reason) => {\n if (!finalReceived) {\n for (const frame of bstream.flush()) {\n sendLastFrame(segmentId, false);\n lastFrame = frame;\n }\n sendLastFrame(segmentId, true);\n\n if (!this.queue.closed) {\n this.queue.put(SynthesizeStream.END_OF_STREAM);\n }\n }\n resolve();\n });\n\n ws.on('error', (error) => {\n clearMessageTimeout();\n reject(error);\n });\n });\n };\n\n try {\n await Promise.all([inputTask(), sendTask(), recvTask()]);\n } catch (e) {\n throw new Error(`failed in main task: ${e}`);\n } finally {\n await this.closeWebSocket(ws);\n }\n }\n}\n"],"mappings":"AAGA;AAAA,EAEE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,SAAS,eAAe;AACxB,SAAuB,iBAAiB;AAGxC,MAAM,uBAAuB;AAC7B,MAAM,eAAe;AACrB,MAAM,sBAAsB;AAY5B,MAAM,oBAAgC;AAAA,EACpC,OAAO;AAAA,EACP,UAAU;AAAA,EACV,YAAY;AAAA,EACZ,QAAQ,QAAQ,IAAI;AAAA,EACpB,SAAS;AAAA,EACT,cAAc;AAAA,IACZ,WAAW;AAAA,EACb;AAAA,EACA,mBAAmB,IAAI,SAAS,MAAM,kBAAkB;AAAA,IACtD,mBAAmB;AAAA,EACrB,CAAC;AACH;AAEO,MAAM,YAAY,IAAI,IAAI;AAAA,EACvB;AAAA,EACR,QAAQ;AAAA,EAER,YAAY,OAA4B,CAAC,GAAG;AAhD9C;AAiDI,UAAM,KAAK,cAAc,kBAAkB,YAAY,cAAc;AAAA,MACnE,aAAW,UAAK,iBAAL,mBAAmB,cAAa,kBAAkB,aAAa;AAAA,IAC5E,CAAC;AAED,SAAK,OAAO;AAAA,MACV,GAAG;AAAA,MACH,GAAG;AAAA,IACL;AAEA,QAAI,KAAK,KAAK,WAAW,QAAW;AAClC,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,WACE,MACA,aACA,aACmB;AACnB,WAAO,IAAI,cAAc,MAAM,MAAM,KAAK,MAAM,aAAa,WAAW;AAAA,EAC1E;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,iBAAiB,MAAM,KAAK,IAAI;AAAA,EAC7C;AACF;AAEO,MAAM,sBAAsB,IAAI,cAAc;AAAA,EACnD,QAAQ;AAAA,EACR,UAAU,IAAI;AAAA,EACN;AAAA,EACA;AAAA,EAER,YACEA,MACA,MACA,MACA,aACA,aACA;AACA,UAAM,MAAMA,MAAK,aAAa,WAAW;AACzC,SAAK,OAAO;AACZ,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAM,OAAO,EAAE,MAAM,KAAK,KAAK;AAC/B,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,KAAK,OAAQ,WAAW;AACpD,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,IAAI,QAAc,CAAC,SAAS,WAAW;AAC3C,YAAM,MAAM;AAAA,QACV;AAAA,UACE,UAAU,IAAI;AAAA,UACd,MAAM;AAAA,UACN,MAAM,IAAI,WAAW,IAAI;AAAA,UACzB,QAAQ;AAAA,UACR,SAAS;AAAA,YACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,YAClD,gBAAgB;AAAA,UAClB;AAAA,UACA,QAAQ,KAAK;AAAA,QACf;AAAA,QACA,CAAC,QAAQ;AACP,cAAI,IAAI,eAAe,KAAK;AAC1B;AAAA,cACE,IAAI,MAAM,qCAAqC,IAAI,UAAU,IAAI,IAAI,aAAa,EAAE;AAAA,YACtF;AACA;AAAA,UACF;AAEA,cAAI,GAAG,QAAQ,CAAC,UAAU;AACxB,uBAAW,SAAS,QAAQ,MAAM,KAAK,GAAG;AACxC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AAAA,UACF,CAAC;AAED,cAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,gBAAI,IAAI,YAAY,UAAW;AAC/B,iBAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,6BAA6B;AAAA,UAC3D,CAAC;AAED,cAAI,GAAG,SAAS,MAAM;AACpB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI;AAAA,kBACb;AAAA,kBACA;AAAA,kBACA,OAAO;AAAA,kBACP,WAAW;AAAA,gBACb,CAAC;AAAA,cACH;AAAA,YACF;AACA,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,MAAM;AAAA,YACnB;AACA,oBAAQ;AAAA,UACV,CAAC;AAAA,QACH;AAAA,MACF;AAEA,UAAI,GAAG,SAAS,CAAC,QAAQ;AACvB,YAAI,IAAI,SAAS,aAAc;AAC/B,aAAK,QAAQ,MAAM,EAAE,IAAI,GAAG,4BAA4B;AAAA,MAC1D,CAAC;AAED,UAAI,GAAG,SAAS,MAAM,QAAQ,CAAC;AAC/B,UAAI,MAAM,KAAK,UAAU,IAAI,CAAC;AAC9B,UAAI,IAAI;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEO,MAAM,yBAAyB,IAAI,iBAAiB;AAAA,EACjD;AAAA,EACA;AAAA,EACR,QAAQ;AAAA,EAER,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EACpE,OAAwB,YAAY,KAAK,UAAU,EAAE,MAAM,QAAQ,CAAC;AAAA,EAEpE,YAAYA,MAAU,MAAkB;AACtC,UAAMA,IAAG;AACT,SAAK,OAAO;AACZ,SAAK,YAAY,KAAK,kBAAkB,OAAO;AAAA,EACjD;AAAA,EAEA,MAAc,eAAe,IAA8B;AACzD,QAAI;AAGF,UAAI,GAAG,eAAe,UAAU,MAAM;AACpC,WAAG,KAAK,iBAAiB,SAAS;AAClC,WAAG,KAAK,iBAAiB,SAAS;AAIlC,YAAI;AACF,gBAAM,IAAI,QAAc,CAAC,SAAS,YAAY;AAC5C,kBAAM,UAAU,WAAW,MAAM;AAC/B,sBAAQ;AAAA,YACV,GAAG,GAAI;AAEP,eAAG,KAAK,WAAW,MAAM;AACvB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAED,eAAG,KAAK,SAAS,MAAM;AACrB,2BAAa,OAAO;AACpB,sBAAQ;AAAA,YACV,CAAC;AAAA,UACH,CAAC;AAAA,QACH,SAAS,GAAG;AAAA,QAEZ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,cAAQ,KAAK,0CAA0C,CAAC,EAAE;AAAA,IAC5D,UAAE;AACA,UAAI,GAAG,eAAe,UAAU,QAAQ,GAAG,eAAe,UAAU,YAAY;AAC9E,WAAG,MAAM;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,YAAY,UAAU;AAC5B,UAAM,YAAY,UAAU;AAE5B,UAAM,QAAQ,KAAK,KAAK,QAAS,QAAQ,SAAS,IAAI;AACtD,UAAM,MAAM,IAAI,IAAI,GAAG,KAAK,WAAW;AACvC,QAAI,aAAa,OAAO,eAAe,KAAK,KAAK,WAAW,SAAS,CAAC;AACtE,QAAI,aAAa,OAAO,SAAS,KAAK,KAAK,KAAK;AAChD,QAAI,aAAa,OAAO,YAAY,KAAK,KAAK,QAAQ;AAEtD,UAAM,KAAK,IAAI,UAAU,KAAK;AAAA,MAC5B,SAAS;AAAA,QACP,CAAC,oBAAoB,GAAG,SAAS,KAAK,KAAK,MAAO;AAAA,MACpD;AAAA,IACF,CAAC;AAED,UAAM,IAAI,QAAQ,CAAC,SAAS,WAAW;AACrC,SAAG,GAAG,QAAQ,OAAO;AACrB,SAAG,GAAG,SAAS,CAAC,UAAU,OAAO,KAAK,CAAC;AACvC,SAAG,GAAG,SAAS,CAAC,SAAS,OAAO,sBAAsB,IAAI,EAAE,CAAC;AAAA,IAC/D,CAAC;AAED,UAAM,YAAY,YAAY;AAC5B,uBAAiB,QAAQ,KAAK,OAAO;AACnC,YAAI,SAAS,iBAAiB,gBAAgB;AAC5C,eAAK,UAAU,MAAM;AACrB;AAAA,QACF;AACA,aAAK,UAAU,SAAS,IAAI;AAAA,MAC9B;AACA,WAAK,UAAU,SAAS;AACxB,WAAK,UAAU,MAAM;AAAA,IACvB;AAEA,UAAM,WAAW,YAAY;AAC3B,uBAAiB,SAAS,KAAK,WAAW;AACxC,YAAI,KAAK,gBAAgB,OAAO,QAAS;AAEzC,YAAI,OAAO,MAAM;AACjB,YAAI,CAAC,KAAK,SAAS,GAAG,GAAG;AACvB,kBAAQ;AAAA,QACV;AAEA,cAAM,UAAU,KAAK,UAAU;AAAA,UAC7B,MAAM;AAAA,UACN;AAAA,QACF,CAAC;AAED,WAAG,KAAK,OAAO;AAAA,MACjB;AAEA,UAAI,CAAC,KAAK,gBAAgB,OAAO,SAAS;AACxC,WAAG,KAAK,iBAAiB,SAAS;AAAA,MACpC;AAAA,IACF;AAEA,UAAM,WAAW,YAAY;AAC3B,YAAM,UAAU,IAAI,gBAAgB,KAAK,KAAK,YAAY,YAAY;AACtE,UAAI,gBAAgB;AACpB,UAAI,UAAiC;AACrC,UAAI;AAEJ,YAAM,gBAAgB,CAACC,YAAmB,UAAmB;AAC3D,YAAI,aAAa,CAAC,KAAK,MAAM,QAAQ;AACnC,eAAK,MAAM,IAAI,EAAE,WAAW,WAAAA,YAAW,OAAO,WAAW,MAAM,CAAC;AAChE,sBAAY;AAAA,QACd;AAAA,MACF;AAEA,YAAM,sBAAsB,MAAM;AAChC,YAAI,SAAS;AACX,uBAAa,OAAO;AACpB,oBAAU;AAAA,QACZ;AAAA,MACF;AAEA,aAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,WAAG,GAAG,WAAW,CAAC,MAAe,aAAsB;AACrD,8BAAoB;AAEpB,cAAI,CAAC,UAAU;AACb,kBAAM,UAAU,KAAK,MAAM,KAAK,SAAS,CAAC;AAC1C,gBAAI,QAAQ,SAAS,WAAW;AAC9B,8BAAgB;AAChB,kCAAoB;AACpB,yBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,8BAAc,WAAW,KAAK;AAC9B,4BAAY;AAAA,cACd;AACA,4BAAc,WAAW,IAAI;AAE7B,kBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,qBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,cAC/C;AACA,sBAAQ;AAAA,YACV;AAEA;AAAA,UACF;AAEA,gBAAM,SACJ,gBAAgB,SACZ,KAAK,OAAO,MAAM,KAAK,YAAY,KAAK,aAAa,KAAK,UAAU,IACnE;AACP,qBAAW,SAAS,QAAQ,MAAM,MAAqB,GAAG;AACxD,0BAAc,WAAW,KAAK;AAC9B,wBAAY;AAAA,UACd;AAAA,QACF,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,OAAO,YAAY;AACjC,cAAI,CAAC,eAAe;AAClB,uBAAW,SAAS,QAAQ,MAAM,GAAG;AACnC,4BAAc,WAAW,KAAK;AAC9B,0BAAY;AAAA,YACd;AACA,0BAAc,WAAW,IAAI;AAE7B,gBAAI,CAAC,KAAK,MAAM,QAAQ;AACtB,mBAAK,MAAM,IAAI,iBAAiB,aAAa;AAAA,YAC/C;AAAA,UACF;AACA,kBAAQ;AAAA,QACV,CAAC;AAED,WAAG,GAAG,SAAS,CAAC,UAAU;AACxB,8BAAoB;AACpB,iBAAO,KAAK;AAAA,QACd,CAAC;AAAA,MACH,CAAC;AAAA,IACH;AAEA,QAAI;AACF,YAAM,QAAQ,IAAI,CAAC,UAAU,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC;AAAA,IACzD,SAAS,GAAG;AACV,YAAM,IAAI,MAAM,wBAAwB,CAAC,EAAE;AAAA,IAC7C,UAAE;AACA,YAAM,KAAK,eAAe,EAAE;AAAA,IAC9B;AAAA,EACF;AACF;","names":["tts","segmentId"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents-plugin-deepgram",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.27",
|
|
4
4
|
"description": "Deepgram plugin for LiveKit Agents for Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -30,16 +30,16 @@
|
|
|
30
30
|
"@types/ws": "^8.5.10",
|
|
31
31
|
"tsup": "^8.3.5",
|
|
32
32
|
"typescript": "^5.0.0",
|
|
33
|
-
"@livekit/agents": "1.0.
|
|
34
|
-
"@livekit/agents-plugin-silero": "1.0.
|
|
35
|
-
"@livekit/agents-plugins-test": "1.0.
|
|
33
|
+
"@livekit/agents": "1.0.27",
|
|
34
|
+
"@livekit/agents-plugin-silero": "1.0.27",
|
|
35
|
+
"@livekit/agents-plugins-test": "1.0.27"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"ws": "^8.16.0"
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@livekit/rtc-node": "^0.13.12",
|
|
42
|
-
"@livekit/agents": "1.0.
|
|
42
|
+
"@livekit/agents": "1.0.27"
|
|
43
43
|
},
|
|
44
44
|
"scripts": {
|
|
45
45
|
"build": "tsup --onSuccess \"pnpm build:types\"",
|
package/src/stt.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import {
|
|
5
|
+
type APIConnectOptions,
|
|
5
6
|
type AudioBuffer,
|
|
6
7
|
AudioByteStream,
|
|
7
8
|
AudioEnergyFilter,
|
|
@@ -37,6 +38,7 @@ export interface STTOptions {
|
|
|
37
38
|
dictation: boolean;
|
|
38
39
|
diarize: boolean;
|
|
39
40
|
numerals: boolean;
|
|
41
|
+
mipOptOut: boolean;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
const defaultSTTOptions: STTOptions = {
|
|
@@ -58,6 +60,7 @@ const defaultSTTOptions: STTOptions = {
|
|
|
58
60
|
dictation: false,
|
|
59
61
|
diarize: false,
|
|
60
62
|
numerals: false,
|
|
63
|
+
mipOptOut: false,
|
|
61
64
|
};
|
|
62
65
|
|
|
63
66
|
export class STT extends stt.STT {
|
|
@@ -113,8 +116,8 @@ export class STT extends stt.STT {
|
|
|
113
116
|
this.#opts = { ...this.#opts, ...opts };
|
|
114
117
|
}
|
|
115
118
|
|
|
116
|
-
stream(): SpeechStream {
|
|
117
|
-
return new SpeechStream(this, this.#opts,
|
|
119
|
+
stream(options?: { connOptions?: APIConnectOptions }): SpeechStream {
|
|
120
|
+
return new SpeechStream(this, this.#opts, options?.connOptions);
|
|
118
121
|
}
|
|
119
122
|
|
|
120
123
|
async close() {
|
|
@@ -132,12 +135,8 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
132
135
|
#audioDurationCollector: PeriodicCollector<number>;
|
|
133
136
|
label = 'deepgram.SpeechStream';
|
|
134
137
|
|
|
135
|
-
constructor(
|
|
136
|
-
stt
|
|
137
|
-
opts: STTOptions,
|
|
138
|
-
private abortController: AbortController,
|
|
139
|
-
) {
|
|
140
|
-
super(stt, opts.sampleRate);
|
|
138
|
+
constructor(stt: STT, opts: STTOptions, connOptions?: APIConnectOptions) {
|
|
139
|
+
super(stt, opts.sampleRate, connOptions);
|
|
141
140
|
this.#opts = opts;
|
|
142
141
|
this.closed = false;
|
|
143
142
|
this.#audioEnergyFilter = new AudioEnergyFilter();
|
|
@@ -173,6 +172,7 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
173
172
|
keyterm: this.#opts.keyterm,
|
|
174
173
|
profanity_filter: this.#opts.profanityFilter,
|
|
175
174
|
language: this.#opts.language,
|
|
175
|
+
mip_opt_out: this.#opts.mipOptOut,
|
|
176
176
|
};
|
|
177
177
|
Object.entries(params).forEach(([k, v]) => {
|
|
178
178
|
if (v !== undefined) {
|
|
@@ -260,12 +260,13 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
260
260
|
samples100Ms,
|
|
261
261
|
);
|
|
262
262
|
|
|
263
|
+
// waitForAbort internally sets up an abort listener on the abort signal
|
|
264
|
+
// we need to put it outside loop to avoid constant re-registration of the listener
|
|
265
|
+
const abortPromise = waitForAbort(this.abortSignal);
|
|
266
|
+
|
|
263
267
|
try {
|
|
264
268
|
while (!this.closed) {
|
|
265
|
-
const result = await Promise.race([
|
|
266
|
-
this.input.next(),
|
|
267
|
-
waitForAbort(this.abortController.signal),
|
|
268
|
-
]);
|
|
269
|
+
const result = await Promise.race([this.input.next(), abortPromise]);
|
|
269
270
|
|
|
270
271
|
if (result === undefined) return; // aborted
|
|
271
272
|
if (result.done) {
|
|
@@ -303,6 +304,16 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
303
304
|
};
|
|
304
305
|
|
|
305
306
|
const listenTask = Task.from(async (controller) => {
|
|
307
|
+
const putMessage = (message: stt.SpeechEvent) => {
|
|
308
|
+
if (!this.queue.closed) {
|
|
309
|
+
try {
|
|
310
|
+
this.queue.put(message);
|
|
311
|
+
} catch (e) {
|
|
312
|
+
// ignore
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
|
|
306
317
|
const listenMessage = new Promise<void>((resolve, reject) => {
|
|
307
318
|
ws.on('message', (msg) => {
|
|
308
319
|
try {
|
|
@@ -315,13 +326,7 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
315
326
|
// It's also possible we receive a transcript without a SpeechStarted event.
|
|
316
327
|
if (this.#speaking) return;
|
|
317
328
|
this.#speaking = true;
|
|
318
|
-
|
|
319
|
-
try {
|
|
320
|
-
this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
321
|
-
} catch (e) {
|
|
322
|
-
// ignore
|
|
323
|
-
}
|
|
324
|
-
}
|
|
329
|
+
putMessage({ type: stt.SpeechEventType.START_OF_SPEECH });
|
|
325
330
|
break;
|
|
326
331
|
}
|
|
327
332
|
// see this page:
|
|
@@ -342,18 +347,18 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
342
347
|
if (alternatives[0] && alternatives[0].text) {
|
|
343
348
|
if (!this.#speaking) {
|
|
344
349
|
this.#speaking = true;
|
|
345
|
-
|
|
350
|
+
putMessage({
|
|
346
351
|
type: stt.SpeechEventType.START_OF_SPEECH,
|
|
347
352
|
});
|
|
348
353
|
}
|
|
349
354
|
|
|
350
355
|
if (isFinal) {
|
|
351
|
-
|
|
356
|
+
putMessage({
|
|
352
357
|
type: stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
353
358
|
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
354
359
|
});
|
|
355
360
|
} else {
|
|
356
|
-
|
|
361
|
+
putMessage({
|
|
357
362
|
type: stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
358
363
|
alternatives: [alternatives[0], ...alternatives.slice(1)],
|
|
359
364
|
});
|
|
@@ -365,7 +370,7 @@ export class SpeechStream extends stt.SpeechStream {
|
|
|
365
370
|
// a non-empty transcript (deepgram doesn't have a SpeechEnded event)
|
|
366
371
|
if (isEndpoint && this.#speaking) {
|
|
367
372
|
this.#speaking = false;
|
|
368
|
-
|
|
373
|
+
putMessage({ type: stt.SpeechEventType.END_OF_SPEECH });
|
|
369
374
|
}
|
|
370
375
|
|
|
371
376
|
break;
|
package/src/tts.ts
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import {
|
|
4
|
+
import {
|
|
5
|
+
type APIConnectOptions,
|
|
6
|
+
AudioByteStream,
|
|
7
|
+
log,
|
|
8
|
+
shortuuid,
|
|
9
|
+
tokenize,
|
|
10
|
+
tts,
|
|
11
|
+
} from '@livekit/agents';
|
|
5
12
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
13
|
import { request } from 'node:https';
|
|
7
14
|
import { type RawData, WebSocket } from 'ws';
|
|
@@ -56,8 +63,12 @@ export class TTS extends tts.TTS {
|
|
|
56
63
|
}
|
|
57
64
|
}
|
|
58
65
|
|
|
59
|
-
synthesize(
|
|
60
|
-
|
|
66
|
+
synthesize(
|
|
67
|
+
text: string,
|
|
68
|
+
connOptions?: APIConnectOptions,
|
|
69
|
+
abortSignal?: AbortSignal,
|
|
70
|
+
): tts.ChunkedStream {
|
|
71
|
+
return new ChunkedStream(this, text, this.opts, connOptions, abortSignal);
|
|
61
72
|
}
|
|
62
73
|
|
|
63
74
|
stream(): tts.SynthesizeStream {
|
|
@@ -67,11 +78,18 @@ export class TTS extends tts.TTS {
|
|
|
67
78
|
|
|
68
79
|
export class ChunkedStream extends tts.ChunkedStream {
|
|
69
80
|
label = 'deepgram.ChunkedStream';
|
|
81
|
+
#logger = log();
|
|
70
82
|
private opts: TTSOptions;
|
|
71
83
|
private text: string;
|
|
72
84
|
|
|
73
|
-
constructor(
|
|
74
|
-
|
|
85
|
+
constructor(
|
|
86
|
+
tts: TTS,
|
|
87
|
+
text: string,
|
|
88
|
+
opts: TTSOptions,
|
|
89
|
+
connOptions?: APIConnectOptions,
|
|
90
|
+
abortSignal?: AbortSignal,
|
|
91
|
+
) {
|
|
92
|
+
super(text, tts, connOptions, abortSignal);
|
|
75
93
|
this.text = text;
|
|
76
94
|
this.opts = opts;
|
|
77
95
|
}
|
|
@@ -96,6 +114,7 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
96
114
|
[AUTHORIZATION_HEADER]: `Token ${this.opts.apiKey!}`,
|
|
97
115
|
'Content-Type': 'application/json',
|
|
98
116
|
},
|
|
117
|
+
signal: this.abortSignal,
|
|
99
118
|
},
|
|
100
119
|
(res) => {
|
|
101
120
|
if (res.statusCode !== 200) {
|
|
@@ -119,7 +138,8 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
119
138
|
});
|
|
120
139
|
|
|
121
140
|
res.on('error', (err) => {
|
|
122
|
-
|
|
141
|
+
if (err.message === 'aborted') return;
|
|
142
|
+
this.#logger.error({ err }, 'Deepgram TTS response error');
|
|
123
143
|
});
|
|
124
144
|
|
|
125
145
|
res.on('close', () => {
|
|
@@ -142,9 +162,11 @@ export class ChunkedStream extends tts.ChunkedStream {
|
|
|
142
162
|
);
|
|
143
163
|
|
|
144
164
|
req.on('error', (err) => {
|
|
145
|
-
|
|
165
|
+
if (err.name === 'AbortError') return;
|
|
166
|
+
this.#logger.error({ err }, 'Deepgram TTS request error');
|
|
146
167
|
});
|
|
147
168
|
|
|
169
|
+
req.on('close', () => resolve());
|
|
148
170
|
req.write(JSON.stringify(json));
|
|
149
171
|
req.end();
|
|
150
172
|
});
|