@livekit/agents 1.0.19 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/stt.cjs +171 -125
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +4 -0
- package/dist/inference/stt.d.ts +4 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +171 -125
- package/dist/inference/stt.js.map +1 -1
- package/dist/stt/stt.cjs +3 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +1 -0
- package/dist/stt/stt.d.ts +1 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +3 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +13 -1
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +1 -0
- package/dist/tts/tts.d.ts +1 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +13 -1
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +15 -22
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +0 -1
- package/dist/utils.d.ts +0 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +15 -21
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +2 -1
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +3 -10
- package/dist/utils.test.js.map +1 -1
- package/dist/vad.cjs +3 -0
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +1 -0
- package/dist/vad.d.ts +1 -0
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +3 -0
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent_activity.cjs +3 -0
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +3 -0
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.js +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/stt.ts +201 -136
- package/src/stt/stt.ts +4 -0
- package/src/tts/tts.ts +14 -1
- package/src/utils.test.ts +3 -10
- package/src/utils.ts +21 -27
- package/src/vad.ts +4 -0
- package/src/voice/agent_activity.ts +3 -0
- package/src/voice/audio_recognition.ts +1 -1
package/dist/inference/stt.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {} from "@livekit/rtc-node";
|
|
2
|
-
import {} from "ws";
|
|
3
2
|
import { APIError, APIStatusError } from "../_exceptions.js";
|
|
4
3
|
import { AudioByteStream } from "../audio.js";
|
|
5
4
|
import { log } from "../log.js";
|
|
5
|
+
import { createStreamChannel } from "../stream/stream_channel.js";
|
|
6
6
|
import {
|
|
7
7
|
STT as BaseSTT,
|
|
8
8
|
SpeechStream as BaseSpeechStream,
|
|
@@ -80,6 +80,32 @@ class STT extends BaseSTT {
|
|
|
80
80
|
this.streams.add(stream);
|
|
81
81
|
return stream;
|
|
82
82
|
}
|
|
83
|
+
async connectWs(timeout) {
|
|
84
|
+
const params = {
|
|
85
|
+
settings: {
|
|
86
|
+
sample_rate: String(this.opts.sampleRate),
|
|
87
|
+
encoding: this.opts.encoding,
|
|
88
|
+
extra: this.opts.modelOptions
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
if (this.opts.model && this.opts.model !== "auto") {
|
|
92
|
+
params.model = this.opts.model;
|
|
93
|
+
}
|
|
94
|
+
if (this.opts.language) {
|
|
95
|
+
params.settings.language = this.opts.language;
|
|
96
|
+
}
|
|
97
|
+
let baseURL = this.opts.baseURL;
|
|
98
|
+
if (baseURL.startsWith("http://") || baseURL.startsWith("https://")) {
|
|
99
|
+
baseURL = baseURL.replace("http", "ws");
|
|
100
|
+
}
|
|
101
|
+
const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
|
|
102
|
+
const url = `${baseURL}/stt`;
|
|
103
|
+
const headers = { Authorization: `Bearer ${token}` };
|
|
104
|
+
const socket = await connectWs(url, headers, timeout);
|
|
105
|
+
const msg = { ...params, type: "session.create" };
|
|
106
|
+
socket.send(JSON.stringify(msg));
|
|
107
|
+
return socket;
|
|
108
|
+
}
|
|
83
109
|
}
|
|
84
110
|
class SpeechStream extends BaseSpeechStream {
|
|
85
111
|
opts;
|
|
@@ -87,158 +113,178 @@ class SpeechStream extends BaseSpeechStream {
|
|
|
87
113
|
speaking = false;
|
|
88
114
|
speechDuration = 0;
|
|
89
115
|
reconnectEvent = new Event();
|
|
116
|
+
stt;
|
|
117
|
+
connOptions;
|
|
90
118
|
#logger = log();
|
|
91
119
|
constructor(sttImpl, opts, connOptions) {
|
|
92
120
|
super(sttImpl, opts.sampleRate, connOptions);
|
|
93
121
|
this.opts = opts;
|
|
122
|
+
this.stt = sttImpl;
|
|
123
|
+
this.connOptions = connOptions;
|
|
94
124
|
}
|
|
95
125
|
get label() {
|
|
96
126
|
return "inference.SpeechStream";
|
|
97
127
|
}
|
|
98
128
|
updateOptions(opts) {
|
|
99
129
|
this.opts = { ...this.opts, ...opts };
|
|
130
|
+
this.reconnectEvent.set();
|
|
100
131
|
}
|
|
101
132
|
async run() {
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
const
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
133
|
+
while (true) {
|
|
134
|
+
let ws = null;
|
|
135
|
+
let closing = false;
|
|
136
|
+
let finalReceived = false;
|
|
137
|
+
const eventChannel = createStreamChannel();
|
|
138
|
+
const resourceCleanup = () => {
|
|
139
|
+
if (closing) return;
|
|
140
|
+
closing = true;
|
|
141
|
+
eventChannel.close();
|
|
142
|
+
ws == null ? void 0 : ws.removeAllListeners();
|
|
143
|
+
ws == null ? void 0 : ws.close();
|
|
112
144
|
};
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
}
|
|
119
|
-
let baseURL = this.opts.baseURL;
|
|
120
|
-
if (baseURL.startsWith("http://") || baseURL.startsWith("https://")) {
|
|
121
|
-
baseURL = baseURL.replace("http", "ws");
|
|
122
|
-
}
|
|
123
|
-
const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
|
|
124
|
-
const url = `${baseURL}/stt`;
|
|
125
|
-
const headers = { Authorization: `Bearer ${token}` };
|
|
126
|
-
const socket = await connectWs(url, headers, 1e4);
|
|
127
|
-
const msg = { ...params, type: "session.create" };
|
|
128
|
-
socket.send(JSON.stringify(msg));
|
|
129
|
-
return socket;
|
|
130
|
-
};
|
|
131
|
-
const send = async (socket, signal) => {
|
|
132
|
-
const audioStream = new AudioByteStream(
|
|
133
|
-
this.opts.sampleRate,
|
|
134
|
-
1,
|
|
135
|
-
Math.floor(this.opts.sampleRate / 20)
|
|
136
|
-
// 50ms
|
|
137
|
-
);
|
|
138
|
-
for await (const ev of this.input) {
|
|
139
|
-
if (signal.aborted) break;
|
|
140
|
-
let frames;
|
|
141
|
-
if (ev === SpeechStream.FLUSH_SENTINEL) {
|
|
142
|
-
frames = audioStream.flush();
|
|
143
|
-
} else {
|
|
144
|
-
const frame = ev;
|
|
145
|
-
frames = audioStream.write(new Int16Array(frame.data).buffer);
|
|
146
|
-
}
|
|
147
|
-
for (const frame of frames) {
|
|
148
|
-
this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
|
|
149
|
-
const base64 = Buffer.from(frame.data.buffer).toString("base64");
|
|
150
|
-
const msg = { type: "input_audio", audio: base64 };
|
|
151
|
-
socket.send(JSON.stringify(msg));
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
closingWs = true;
|
|
155
|
-
socket.send(JSON.stringify({ type: "session.finalize" }));
|
|
156
|
-
};
|
|
157
|
-
const recv = async (socket, signal) => {
|
|
158
|
-
while (!this.closed && !signal.aborted) {
|
|
159
|
-
const dataPromise = new Promise((resolve, reject) => {
|
|
160
|
-
const messageHandler = (d) => {
|
|
161
|
-
resolve(d.toString());
|
|
162
|
-
removeListeners();
|
|
145
|
+
const createWsListener = async (ws2, signal) => {
|
|
146
|
+
return new Promise((resolve, reject) => {
|
|
147
|
+
const onAbort = () => {
|
|
148
|
+
resourceCleanup();
|
|
149
|
+
reject(new Error("WebSocket connection aborted"));
|
|
163
150
|
};
|
|
164
|
-
|
|
151
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
152
|
+
ws2.on("message", (data) => {
|
|
153
|
+
const json = JSON.parse(data.toString());
|
|
154
|
+
eventChannel.write(json);
|
|
155
|
+
});
|
|
156
|
+
ws2.on("error", (e) => {
|
|
157
|
+
this.#logger.error({ error: e }, "WebSocket error");
|
|
158
|
+
resourceCleanup();
|
|
165
159
|
reject(e);
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if (
|
|
170
|
-
|
|
160
|
+
});
|
|
161
|
+
ws2.on("close", (code) => {
|
|
162
|
+
resourceCleanup();
|
|
163
|
+
if (!closing) return this.#logger.error("WebSocket closed unexpectedly");
|
|
164
|
+
if (finalReceived) return resolve();
|
|
165
|
+
reject(
|
|
166
|
+
new APIStatusError({
|
|
167
|
+
message: "LiveKit STT connection closed unexpectedly",
|
|
168
|
+
options: { statusCode: code }
|
|
169
|
+
})
|
|
170
|
+
);
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
};
|
|
174
|
+
const send = async (socket, signal) => {
|
|
175
|
+
const audioStream = new AudioByteStream(
|
|
176
|
+
this.opts.sampleRate,
|
|
177
|
+
1,
|
|
178
|
+
Math.floor(this.opts.sampleRate / 20)
|
|
179
|
+
// 50ms
|
|
180
|
+
);
|
|
181
|
+
const abortPromise = new Promise((_, reject) => {
|
|
182
|
+
if (signal.aborted) {
|
|
183
|
+
return reject(new Error("Send aborted"));
|
|
184
|
+
}
|
|
185
|
+
const onAbort = () => reject(new Error("Send aborted"));
|
|
186
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
187
|
+
});
|
|
188
|
+
const iterator = this.input[Symbol.asyncIterator]();
|
|
189
|
+
try {
|
|
190
|
+
while (true) {
|
|
191
|
+
const result = await Promise.race([iterator.next(), abortPromise]);
|
|
192
|
+
if (result.done) break;
|
|
193
|
+
const ev = result.value;
|
|
194
|
+
let frames;
|
|
195
|
+
if (ev === SpeechStream.FLUSH_SENTINEL) {
|
|
196
|
+
frames = audioStream.flush();
|
|
171
197
|
} else {
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
message: "LiveKit STT connection closed unexpectedly",
|
|
175
|
-
options: { statusCode: code }
|
|
176
|
-
})
|
|
177
|
-
);
|
|
198
|
+
const frame = ev;
|
|
199
|
+
frames = audioStream.write(new Int16Array(frame.data).buffer);
|
|
178
200
|
}
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
socket.
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
const type = json.type;
|
|
194
|
-
switch (type) {
|
|
195
|
-
case "session.created":
|
|
196
|
-
case "session.finalized":
|
|
197
|
-
case "session.closed":
|
|
198
|
-
break;
|
|
199
|
-
case "interim_transcript":
|
|
200
|
-
this.processTranscript(json, false);
|
|
201
|
-
break;
|
|
202
|
-
case "final_transcript":
|
|
203
|
-
this.processTranscript(json, true);
|
|
204
|
-
break;
|
|
205
|
-
case "error":
|
|
206
|
-
this.#logger.error("received error from LiveKit STT: %o", json);
|
|
207
|
-
throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
|
|
208
|
-
default:
|
|
209
|
-
this.#logger.warn("received unexpected message from LiveKit STT: %o", json);
|
|
210
|
-
break;
|
|
201
|
+
for (const frame of frames) {
|
|
202
|
+
this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
|
|
203
|
+
const base64 = Buffer.from(frame.data.buffer).toString("base64");
|
|
204
|
+
const msg = { type: "input_audio", audio: base64 };
|
|
205
|
+
socket.send(JSON.stringify(msg));
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
closing = true;
|
|
209
|
+
socket.send(JSON.stringify({ type: "session.finalize" }));
|
|
210
|
+
} catch (e) {
|
|
211
|
+
if (e.message === "Send aborted") {
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
throw e;
|
|
211
215
|
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
|
|
216
|
+
};
|
|
217
|
+
const recv = async (signal) => {
|
|
218
|
+
const serverEventStream = eventChannel.stream();
|
|
219
|
+
const reader = serverEventStream.getReader();
|
|
220
|
+
try {
|
|
221
|
+
while (!this.closed && !signal.aborted) {
|
|
222
|
+
const result = await reader.read();
|
|
223
|
+
if (signal.aborted) return;
|
|
224
|
+
if (result.done) return;
|
|
225
|
+
const json = result.value;
|
|
226
|
+
const type = json.type;
|
|
227
|
+
switch (type) {
|
|
228
|
+
case "session.created":
|
|
229
|
+
case "session.finalized":
|
|
230
|
+
break;
|
|
231
|
+
case "session.closed":
|
|
232
|
+
finalReceived = true;
|
|
233
|
+
resourceCleanup();
|
|
234
|
+
break;
|
|
235
|
+
case "interim_transcript":
|
|
236
|
+
this.processTranscript(json, false);
|
|
237
|
+
break;
|
|
238
|
+
case "final_transcript":
|
|
239
|
+
this.processTranscript(json, true);
|
|
240
|
+
break;
|
|
241
|
+
case "error":
|
|
242
|
+
this.#logger.error({ error: json }, "Received error from LiveKit STT");
|
|
243
|
+
resourceCleanup();
|
|
244
|
+
throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
|
|
245
|
+
default:
|
|
246
|
+
this.#logger.warn(
|
|
247
|
+
{ message: json },
|
|
248
|
+
"Received unexpected message from LiveKit STT"
|
|
249
|
+
);
|
|
250
|
+
break;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
} finally {
|
|
254
|
+
reader.releaseLock();
|
|
255
|
+
try {
|
|
256
|
+
await serverEventStream.cancel();
|
|
257
|
+
} catch (e) {
|
|
258
|
+
this.#logger.debug("Error cancelling serverEventStream (may already be cancelled):", e);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
};
|
|
215
262
|
try {
|
|
216
|
-
ws = await
|
|
217
|
-
const
|
|
218
|
-
|
|
219
|
-
});
|
|
220
|
-
const recvTask = Task.from(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
await Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]);
|
|
226
|
-
});
|
|
263
|
+
ws = await this.stt.connectWs(this.connOptions.timeoutMs);
|
|
264
|
+
const controller = new AbortController();
|
|
265
|
+
const sendTask = Task.from(({ signal }) => send(ws, signal), controller);
|
|
266
|
+
const wsListenerTask = Task.from(({ signal }) => createWsListener(ws, signal), controller);
|
|
267
|
+
const recvTask = Task.from(({ signal }) => recv(signal), controller);
|
|
268
|
+
const waitReconnectTask = Task.from(
|
|
269
|
+
({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),
|
|
270
|
+
controller
|
|
271
|
+
);
|
|
227
272
|
try {
|
|
228
273
|
await Promise.race([
|
|
229
|
-
Promise.all(
|
|
274
|
+
Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),
|
|
230
275
|
waitReconnectTask.result
|
|
231
276
|
]);
|
|
232
277
|
if (!waitReconnectTask.done) break;
|
|
233
278
|
this.reconnectEvent.clear();
|
|
234
279
|
} finally {
|
|
235
|
-
await cancelAndWait(
|
|
280
|
+
await cancelAndWait(
|
|
281
|
+
[sendTask, wsListenerTask, recvTask, waitReconnectTask],
|
|
282
|
+
DEFAULT_CANCEL_TIMEOUT
|
|
283
|
+
);
|
|
284
|
+
resourceCleanup();
|
|
236
285
|
}
|
|
237
286
|
} finally {
|
|
238
|
-
|
|
239
|
-
if (ws) ws.close();
|
|
240
|
-
} catch {
|
|
241
|
-
}
|
|
287
|
+
resourceCleanup();
|
|
242
288
|
}
|
|
243
289
|
}
|
|
244
290
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { type RawData } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-general'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-general'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';\n\nexport interface CartesiaOptions {\n min_volume?: number; // default: not specified\n max_silence_duration_secs?: number; // default: not specified\n}\n\nexport interface DeepgramOptions {\n filler_words?: boolean; // default: true\n interim_results?: boolean; // default: true\n endpointing?: number; // default: 25 (ms)\n punctuate?: boolean; // default: false\n smart_format?: boolean;\n keywords?: Array<[string, number]>;\n keyterms?: string[];\n profanity_filter?: boolean;\n numerals?: boolean;\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n format_turns?: boolean; // default: false\n end_of_turn_confidence_threshold?: number; // default: 0.01\n min_end_of_turn_silence_when_confident?: number; // default: 0\n max_turn_silence?: number; // default: not specified\n keyterms_prompt?: string[]; // default: not specified\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: STTLanguages;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: TModel;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n }) {\n super({ streaming: true, interimResults: true });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n this.opts = {\n model,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n if (modelString.includes(':')) {\n const [model, language] = modelString.split(':') as [AnyString, STTLanguages];\n return new STT({ model, language });\n }\n return new STT({ model: modelString });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n }\n\n protected async run(): Promise<void> {\n let ws: WebSocket | null = null;\n let closingWs = false;\n\n this.reconnectEvent.set();\n\n const connect = async () => {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, 10000);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n for await (const ev of this.input) {\n if (signal.aborted) break;\n let frames: AudioFrame[];\n\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closingWs = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n };\n\n const recv = async (socket: WebSocket, signal: AbortSignal) => {\n while (!this.closed && !signal.aborted) {\n const dataPromise = new Promise<string>((resolve, reject) => {\n const messageHandler = (d: RawData) => {\n resolve(d.toString());\n removeListeners();\n };\n const errorHandler = (e: Error) => {\n reject(e);\n removeListeners();\n };\n const closeHandler = (code: number) => {\n if (closingWs) {\n resolve('');\n } else {\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n }\n removeListeners();\n };\n const removeListeners = () => {\n socket.removeListener('message', messageHandler);\n socket.removeListener('error', errorHandler);\n socket.removeListener('close', closeHandler);\n };\n socket.once('message', messageHandler);\n socket.once('error', errorHandler);\n socket.once('close', closeHandler);\n });\n\n const data = await Promise.race([dataPromise, waitForAbort(signal)]);\n\n if (!data || signal.aborted) return;\n\n const json = JSON.parse(data);\n const type = json.type as string | undefined;\n\n switch (type) {\n case 'session.created':\n case 'session.finalized':\n case 'session.closed':\n break;\n case 'interim_transcript':\n this.processTranscript(json, false);\n break;\n case 'final_transcript':\n this.processTranscript(json, true);\n break;\n case 'error':\n this.#logger.error('received error from LiveKit STT: %o', json);\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);\n default:\n this.#logger.warn('received unexpected message from LiveKit STT: %o', json);\n break;\n }\n }\n };\n\n while (true) {\n try {\n ws = await connect();\n\n const sendTask = Task.from(async ({ signal }) => {\n await send(ws!, signal);\n });\n\n const recvTask = Task.from(async ({ signal }) => {\n await recv(ws!, signal);\n });\n\n const tasks = [sendTask, recvTask];\n const waitReconnectTask = Task.from(async ({ signal }) => {\n await Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]);\n });\n\n try {\n await Promise.race([\n Promise.all(tasks.map((task) => task.result)),\n waitReconnectTask.result,\n ]);\n\n if (!waitReconnectTask.done) break;\n this.reconnectEvent.clear();\n } finally {\n await cancelAndWait([sendTask, recvTask, waitReconnectTask], DEFAULT_CANCEL_TIMEOUT);\n }\n } finally {\n try {\n if (ws) ws.close();\n } catch {}\n }\n }\n }\n\n private processTranscript(data: Record<string, any>, isFinal: boolean) {\n const requestId = data.request_id ?? this.requestId;\n const text = data.transcript ?? '';\n const language = data.language ?? this.opts.language ?? 'en';\n\n if (!text && !isFinal) return;\n\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: data.start ?? 0,\n endTime: data.duration ?? 0,\n confidence: data.confidence ?? 1.0,\n text,\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n }\n}\n"],"mappings":"AAGA,eAAgC;AAEhC,eAA6B;AAC7B,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB;AAAA,EACE,OAAO;AAAA,EACP,gBAAgB;AAAA,EAGhB;AAAA,OACK;AACP,SAAiC,mCAAmC;AACpE,SAA2B,OAAO,MAAM,eAAe,WAAW,oBAAoB;AACtF,SAAyB,WAAW,yBAAyB;AAuE7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAgBxB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,UAAU,IAAI;AAAA,EAEd,YAAY,MAST;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,KAAK,CAAC;AAE/C,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,QAAQ,IAAI,YAAY,MAAM,GAAG;AAC/C,aAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,IACpC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAC5E,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,iBAAiB;AAAA,EACnE;AAAA,EACA,YAAY,UAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,MAAM;AAAA,EAEnC,UAAU,IAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAAA,EACtC;AAAA,EAEA,MAAgB,MAAqB;AACnC,QAAI,KAAuB;AAC3B,QAAI,YAAY;AAEhB,SAAK,eAAe,IAAI;AAExB,UAAM,UAAU,YAAY;AAC1B,YAAM,SAAS;AAAA,QACb,UAAU;AAAA,UACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,UACxC,UAAU,KAAK,KAAK;AAAA,UACpB,OAAO,KAAK,KAAK;AAAA,QACnB;AAAA,MACF;AAEA,UAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,eAAO,QAAQ,KAAK,KAAK;AAAA,MAC3B;AAEA,UAAI,KAAK,KAAK,UAAU;AACtB,QAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,MACpE;AAEA,UAAI,UAAU,KAAK,KAAK;AACxB,UAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,kBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,MACxC;AAEA,YAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,YAAM,MAAM,GAAG,OAAO;AACtB,YAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,YAAM,SAAS,MAAM,UAAU,KAAK,SAAS,GAAK;AAClD,YAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,aAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,aAAO;AAAA,IACT;AAEA,UAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,YAAM,cAAc,IAAI;AAAA,QACtB,KAAK,KAAK;AAAA,QACV;AAAA,QACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,MACtC;AAEA,uBAAiB,MAAM,KAAK,OAAO;AACjC,YAAI,OAAO,QAAS;AACpB,YAAI;AAEJ,YAAI,OAAO,aAAa,gBAAgB;AACtC,mBAAS,YAAY,MAAM;AAAA,QAC7B,OAAO;AACL,gBAAM,QAAQ;AACd,mBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,QAC9D;AAEA,mBAAW,SAAS,QAAQ;AAC1B,eAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,gBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,gBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,iBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,QACjC;AAAA,MACF;AAEA,kBAAY;AACZ,aAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,IAC1D;AAEA,UAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,aAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,cAAM,cAAc,IAAI,QAAgB,CAAC,SAAS,WAAW;AAC3D,gBAAM,iBAAiB,CAAC,MAAe;AACrC,oBAAQ,EAAE,SAAS,CAAC;AACpB,4BAAgB;AAAA,UAClB;AACA,gBAAM,eAAe,CAAC,MAAa;AACjC,mBAAO,CAAC;AACR,4BAAgB;AAAA,UAClB;AACA,gBAAM,eAAe,CAAC,SAAiB;AACrC,gBAAI,WAAW;AACb,sBAAQ,EAAE;AAAA,YACZ,OAAO;AACL;AAAA,gBACE,IAAI,eAAe;AAAA,kBACjB,SAAS;AAAA,kBACT,SAAS,EAAE,YAAY,KAAK;AAAA,gBAC9B,CAAC;AAAA,cACH;AAAA,YACF;AACA,4BAAgB;AAAA,UAClB;AACA,gBAAM,kBAAkB,MAAM;AAC5B,mBAAO,eAAe,WAAW,cAAc;AAC/C,mBAAO,eAAe,SAAS,YAAY;AAC3C,mBAAO,eAAe,SAAS,YAAY;AAAA,UAC7C;AACA,iBAAO,KAAK,WAAW,cAAc;AACrC,iBAAO,KAAK,SAAS,YAAY;AACjC,iBAAO,KAAK,SAAS,YAAY;AAAA,QACnC,CAAC;AAED,cAAM,OAAO,MAAM,QAAQ,KAAK,CAAC,aAAa,aAAa,MAAM,CAAC,CAAC;AAEnE,YAAI,CAAC,QAAQ,OAAO,QAAS;AAE7B,cAAM,OAAO,KAAK,MAAM,IAAI;AAC5B,cAAM,OAAO,KAAK;AAElB,gBAAQ,MAAM;AAAA,UACZ,KAAK;AAAA,UACL,KAAK;AAAA,UACL,KAAK;AACH;AAAA,UACF,KAAK;AACH,iBAAK,kBAAkB,MAAM,KAAK;AAClC;AAAA,UACF,KAAK;AACH,iBAAK,kBAAkB,MAAM,IAAI;AACjC;AAAA,UACF,KAAK;AACH,iBAAK,QAAQ,MAAM,uCAAuC,IAAI;AAC9D,kBAAM,IAAI,SAAS,+BAA+B,KAAK,UAAU,IAAI,CAAC,EAAE;AAAA,UAC1E;AACE,iBAAK,QAAQ,KAAK,oDAAoD,IAAI;AAC1E;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,WAAO,MAAM;AACX,UAAI;AACF,aAAK,MAAM,QAAQ;AAEnB,cAAM,WAAW,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AAC/C,gBAAM,KAAK,IAAK,MAAM;AAAA,QACxB,CAAC;AAED,cAAM,WAAW,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AAC/C,gBAAM,KAAK,IAAK,MAAM;AAAA,QACxB,CAAC;AAED,cAAM,QAAQ,CAAC,UAAU,QAAQ;AACjC,cAAM,oBAAoB,KAAK,KAAK,OAAO,EAAE,OAAO,MAAM;AACxD,gBAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,QACvE,CAAC;AAED,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,MAAM,IAAI,CAAC,SAAS,KAAK,MAAM,CAAC;AAAA,YAC5C,kBAAkB;AAAA,UACpB,CAAC;AAED,cAAI,CAAC,kBAAkB,KAAM;AAC7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AACA,gBAAM,cAAc,CAAC,UAAU,UAAU,iBAAiB,GAAG,sBAAsB;AAAA,QACrF;AAAA,MACF,UAAE;AACA,YAAI;AACF,cAAI,GAAI,IAAG,MAAM;AAAA,QACnB,QAAQ;AAAA,QAAC;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA2B,SAAkB;AACrE,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK,cAAc;AAChC,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAGvB,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,WAAW;AAChB,WAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AAAA,IAC1D;AAEA,UAAM,aAAyB;AAAA,MAC7B;AAAA,MACA,WAAW,KAAK,SAAS;AAAA,MACzB,SAAS,KAAK,YAAY;AAAA,MAC1B,YAAY,KAAK,cAAc;AAAA,MAC/B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,UAAI,KAAK,iBAAiB,GAAG;AAC3B,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,QACzD,CAAC;AACD,aAAK,iBAAiB;AAAA,MACxB;AAEA,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAED,UAAI,KAAK,UAAU;AACjB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAAA,MACxD;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-general'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-general'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';\n\nexport interface CartesiaOptions {\n min_volume?: number; // default: not specified\n max_silence_duration_secs?: number; // default: not specified\n}\n\nexport interface DeepgramOptions {\n filler_words?: boolean; // default: true\n interim_results?: boolean; // default: true\n endpointing?: number; // default: 25 (ms)\n punctuate?: boolean; // default: false\n smart_format?: boolean;\n keywords?: Array<[string, number]>;\n keyterms?: string[];\n profanity_filter?: boolean;\n numerals?: boolean;\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n format_turns?: boolean; // default: false\n end_of_turn_confidence_threshold?: number; // default: 0.01\n min_end_of_turn_silence_when_confident?: number; // default: 0\n max_turn_silence?: number; // default: not specified\n keyterms_prompt?: string[]; // default: not specified\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: STTLanguages;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: TModel;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n }) {\n super({ streaming: true, interimResults: true });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n this.opts = {\n model,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n if (modelString.includes(':')) {\n const [model, language] = modelString.split(':') as [AnyString, STTLanguages];\n return new STT({ model, language });\n }\n return new STT({ model: modelString });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, timeout);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n private stt: STT<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n this.stt = sttImpl;\n this.connOptions = connOptions;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n this.reconnectEvent.set();\n }\n\n protected async run(): Promise<void> {\n while (true) {\n // Create fresh resources for each connection attempt\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n\n type SttServerEvent = Record<string, any>;\n const eventChannel = createStreamChannel<SttServerEvent>();\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {\n return new Promise<void>((resolve, reject) => {\n const onAbort = () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n };\n\n signal.addEventListener('abort', onAbort, { once: true });\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString()) as SttServerEvent;\n eventChannel.write(json);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', (code: number) => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n });\n });\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n // Create abort promise once to avoid memory leak\n const abortPromise = new Promise<never>((_, reject) => {\n if (signal.aborted) {\n return reject(new Error('Send aborted'));\n }\n const onAbort = () => reject(new Error('Send aborted'));\n signal.addEventListener('abort', onAbort, { once: true });\n });\n\n // Manual iteration to support cancellation\n const iterator = this.input[Symbol.asyncIterator]();\n try {\n while (true) {\n const result = await Promise.race([iterator.next(), abortPromise]);\n\n if (result.done) break;\n const ev = result.value;\n\n let frames: AudioFrame[];\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closing = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n } catch (e) {\n if ((e as Error).message === 'Send aborted') {\n // Expected abort, don't log\n return;\n }\n throw e;\n }\n };\n\n const recv = async (signal: AbortSignal) => {\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const json = result.value;\n const type = json.type as string | undefined;\n\n switch (type) {\n case 'session.created':\n case 'session.finalized':\n break;\n case 'session.closed':\n finalReceived = true;\n resourceCleanup();\n break;\n case 'interim_transcript':\n this.processTranscript(json, false);\n break;\n case 'final_transcript':\n this.processTranscript(json, true);\n break;\n case 'error':\n this.#logger.error({ error: json }, 'Received error from LiveKit STT');\n resourceCleanup();\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);\n default:\n this.#logger.warn(\n { message: json },\n 'Received unexpected message from LiveKit STT',\n );\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.stt.connectWs(this.connOptions.timeoutMs);\n\n // Wrap tasks for proper cancellation support using Task signals\n const controller = new AbortController();\n const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);\n const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);\n const recvTask = Task.from(({ signal }) => recv(signal), controller);\n const waitReconnectTask = Task.from(\n ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),\n controller,\n );\n\n try {\n await Promise.race([\n Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),\n waitReconnectTask.result,\n ]);\n\n // If reconnect didn't trigger, tasks finished - exit loop\n if (!waitReconnectTask.done) break;\n\n // Reconnect triggered - clear event and continue loop\n this.reconnectEvent.clear();\n } finally {\n // Cancel all tasks to ensure cleanup\n await cancelAndWait(\n [sendTask, wsListenerTask, recvTask, waitReconnectTask],\n DEFAULT_CANCEL_TIMEOUT,\n );\n resourceCleanup();\n }\n } finally {\n // Ensure cleanup even if connectWs throws\n resourceCleanup();\n }\n }\n }\n\n private processTranscript(data: Record<string, any>, isFinal: boolean) {\n const requestId = data.request_id ?? this.requestId;\n const text = data.transcript ?? '';\n const language = data.language ?? this.opts.language ?? 'en';\n\n if (!text && !isFinal) return;\n\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: data.start ?? 0,\n endTime: data.duration ?? 0,\n confidence: data.confidence ?? 1.0,\n text,\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n }\n}\n"],"mappings":"AAGA,eAAgC;AAEhC,SAAS,UAAU,sBAAsB;AACzC,SAAS,uBAAuB;AAChC,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC;AAAA,EACE,OAAO;AAAA,EACP,gBAAgB;AAAA,EAGhB;AAAA,OACK;AACP,SAAiC,mCAAmC;AACpE,SAA2B,OAAO,MAAM,eAAe,WAAW,oBAAoB;AACtF,SAAyB,WAAW,yBAAyB;AAuE7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAgBxB,MAAM,YAAsC,QAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,UAAU,IAAI;AAAA,EAEd,YAAY,MAST;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,KAAK,CAAC;AAE/C,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,QAAQ,IAAI,YAAY,MAAM,GAAG;AAC/C,aAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,IACpC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,4BAA4B,IAAI,WAAW,CAAC;AAC5E,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,UAAM,SAAS;AAAA,MACb,UAAU;AAAA,QACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,QACxC,UAAU,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,aAAO,QAAQ,KAAK,KAAK;AAAA,IAC3B;AAEA,QAAI,KAAK,KAAK,UAAU;AACtB,MAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,IACpE;AAEA,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,MAAM,kBAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS,MAAM,UAAU,KAAK,SAAS,OAAO;AACpD,UAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,WAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,iBAAiB;AAAA,EACnE;AAAA,EACA,YAAY,UAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,MAAM;AAAA,EAC3B;AAAA,EACA;AAAA,EAER,UAAU,IAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,SAAK,eAAe,IAAI;AAAA,EAC1B;AAAA,EAEA,MAAgB,MAAqB;AACnC,WAAO,MAAM;AAEX,UAAI,KAAuB;AAC3B,UAAI,UAAU;AACd,UAAI,gBAAgB;AAGpB,YAAM,eAAe,oBAAoC;AAEzD,YAAM,kBAAkB,MAAM;AAC5B,YAAI,QAAS;AACb,kBAAU;AACV,qBAAa,MAAM;AACnB,iCAAI;AACJ,iCAAI;AAAA,MACN;AAEA,YAAM,mBAAmB,OAAOA,KAAe,WAAwB;AACrE,eAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,gBAAM,UAAU,MAAM;AACpB,4BAAgB;AAChB,mBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,UAClD;AAEA,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAExD,UAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,kBAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,yBAAa,MAAM,IAAI;AAAA,UACzB,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,iBAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,4BAAgB;AAChB,mBAAO,CAAC;AAAA,UACV,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,SAAiB;AAC/B,4BAAgB;AAEhB,gBAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,gBAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,cACE,IAAI,eAAe;AAAA,gBACjB,SAAS;AAAA,gBACT,SAAS,EAAE,YAAY,KAAK;AAAA,cAC9B,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH;AAEA,YAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,cAAM,cAAc,IAAI;AAAA,UACtB,KAAK,KAAK;AAAA,UACV;AAAA,UACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,QACtC;AAGA,cAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACrD,cAAI,OAAO,SAAS;AAClB,mBAAO,OAAO,IAAI,MAAM,cAAc,CAAC;AAAA,UACzC;AACA,gBAAM,UAAU,MAAM,OAAO,IAAI,MAAM,cAAc,CAAC;AACtD,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,QAC1D,CAAC;AAGD,cAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAClD,YAAI;AACF,iBAAO,MAAM;AACX,kBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,SAAS,KAAK,GAAG,YAAY,CAAC;AAEjE,gBAAI,OAAO,KAAM;AACjB,kBAAM,KAAK,OAAO;AAElB,gBAAI;AACJ,gBAAI,OAAO,aAAa,gBAAgB;AACtC,uBAAS,YAAY,MAAM;AAAA,YAC7B,OAAO;AACL,oBAAM,QAAQ;AACd,uBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,YAC9D;AAEA,uBAAW,SAAS,QAAQ;AAC1B,mBAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,oBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,oBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,qBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,YACjC;AAAA,UACF;AAEA,oBAAU;AACV,iBAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,QAC1D,SAAS,GAAG;AACV,cAAK,EAAY,YAAY,gBAAgB;AAE3C;AAAA,UACF;AACA,gBAAM;AAAA,QACR;AAAA,MACF;AAEA,YAAM,OAAO,OAAO,WAAwB;AAC1C,cAAM,oBAAoB,aAAa,OAAO;AAC9C,cAAM,SAAS,kBAAkB,UAAU;AAE3C,YAAI;AACF,iBAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,kBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,gBAAI,OAAO,QAAS;AACpB,gBAAI,OAAO,KAAM;AAEjB,kBAAM,OAAO,OAAO;AACpB,kBAAM,OAAO,KAAK;AAElB,oBAAQ,MAAM;AAAA,cACZ,KAAK;AAAA,cACL,KAAK;AACH;AAAA,cACF,KAAK;AACH,gCAAgB;AAChB,gCAAgB;AAChB;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,KAAK;AAClC;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,IAAI;AACjC;AAAA,cACF,KAAK;AACH,qBAAK,QAAQ,MAAM,EAAE,OAAO,KAAK,GAAG,iCAAiC;AACrE,gCAAgB;AAChB,sBAAM,IAAI,SAAS,+BAA+B,KAAK,UAAU,IAAI,CAAC,EAAE;AAAA,cAC1E;AACE,qBAAK,QAAQ;AAAA,kBACX,EAAE,SAAS,KAAK;AAAA,kBAChB;AAAA,gBACF;AACA;AAAA,YACJ;AAAA,UACF;AAAA,QACF,UAAE;AACA,iBAAO,YAAY;AACnB,cAAI;AACF,kBAAM,kBAAkB,OAAO;AAAA,UACjC,SAAS,GAAG;AACV,iBAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,UACxF;AAAA,QACF;AAAA,MACF;AAEA,UAAI;AACF,aAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAGxD,cAAM,aAAa,IAAI,gBAAgB;AACvC,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,IAAK,MAAM,GAAG,UAAU;AACxE,cAAM,iBAAiB,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,iBAAiB,IAAK,MAAM,GAAG,UAAU;AAC1F,cAAM,WAAW,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,MAAM,GAAG,UAAU;AACnE,cAAM,oBAAoB,KAAK;AAAA,UAC7B,CAAC,EAAE,OAAO,MAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,GAAG,aAAa,MAAM,CAAC,CAAC;AAAA,UAC/E;AAAA,QACF;AAEA,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,CAAC,SAAS,QAAQ,eAAe,QAAQ,SAAS,MAAM,CAAC;AAAA,YACrE,kBAAkB;AAAA,UACpB,CAAC;AAGD,cAAI,CAAC,kBAAkB,KAAM;AAG7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AAEA,gBAAM;AAAA,YACJ,CAAC,UAAU,gBAAgB,UAAU,iBAAiB;AAAA,YACtD;AAAA,UACF;AACA,0BAAgB;AAAA,QAClB;AAAA,MACF,UAAE;AAEA,wBAAgB;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA2B,SAAkB;AACrE,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK,cAAc;AAChC,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAGvB,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,WAAW;AAChB,WAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AAAA,IAC1D;AAEA,UAAM,aAAyB;AAAA,MAC7B;AAAA,MACA,WAAW,KAAK,SAAS;AAAA,MACzB,SAAS,KAAK,YAAY;AAAA,MAC1B,YAAY,KAAK,cAAc;AAAA,MAC/B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,UAAI,KAAK,iBAAiB,GAAG;AAC3B,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,gBAAgB;AAAA,UACtB;AAAA,UACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,QACzD,CAAC;AACD,aAAK,iBAAiB;AAAA,MACxB;AAEA,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAED,UAAI,KAAK,UAAU;AACjB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAAA,MACxD;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,gBAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAAA,IACH;AAAA,EACF;AACF;","names":["ws"]}
|
package/dist/stt/stt.cjs
CHANGED
package/dist/stt/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAA8D;AAGvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAmFL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EASA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
package/dist/stt/stt.d.cts
CHANGED
package/dist/stt/stt.d.ts
CHANGED
package/dist/stt/stt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAE5E,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAezD,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAEvE;;;OAGG;IACH,QAAQ,CAAC,MAAM,IAAI,YAAY;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/stt/stt.ts"],"names":[],"mappings":";AAGA,OAAO,EAAE,KAAK,UAAU,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACpE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAItD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAErD,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAA6B,MAAM,aAAa,CAAC;AAE5E,2CAA2C;AAC3C,oBAAY,eAAe;IACzB;;;;OAIG;IACH,eAAe,IAAI;IACnB;;OAEG;IACH,kBAAkB,IAAI;IACtB;;;OAGG;IACH,gBAAgB,IAAI;IACpB;;;OAGG;IACH,aAAa,IAAI;IACjB,mEAAmE;IACnE,iBAAiB,IAAI;IACrB;;;;OAIG;IACH,oBAAoB,IAAI;CACzB;AAED,mEAAmE;AACnE,MAAM,WAAW,UAAU;IACzB,QAAQ,EAAE,MAAM,CAAC;IACjB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,sDAAsD;AACtD,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,eAAe,CAAC;IACtB,YAAY,CAAC,EAAE,CAAC,UAAU,EAAE,GAAG,UAAU,EAAE,CAAC,CAAC;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,gBAAgB,CAAC,EAAE,gBAAgB,CAAC;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,OAAO,CAAC;IACnB,cAAc,EAAE,OAAO,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,KAAK,CAAC;IACb,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;IACrD,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CACtC,CAAC;kCAS2D,aAAa,YAAY,CAAC;AAPvF;;;;;;GAMG;AACH,8BAAsB,GAAI,SAAQ,QAAsD;;IACtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAGX,YAAY,EAAE,eAAe;IAKzC,sCAAsC;IACtC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED,8FAA8F;IACxF,SAAS,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAezD,SAAS,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAEvE;;;OAGG;IACH,QAAQ,CAAC,MAAM,IAAI,YAAY;IAEzB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED;;;;;;;;;;;;;;;GAeG;AACH,8BAAsB,YAAa,YAAW,qBAAqB,CAAC,WAAW,CAAC;;IAC9E,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,sEAA6E;IAC5F,SAAS,CAAC,MAAM,kCAAyC;IACzD,SAAS,CAAC,KAAK,kCAAyC;IACxD,SAAS,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IACpC,SAAS,CAAC,SAAS,CAAC,EAAE,cAAc,CAAC;IACrC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,SAAS,CAAC,MAAM,UAAS;IAEzB,OAAO,CAAC,mBAAmB,CAAqC;IAChE,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,YAAY,CAAoB;gBAGtC,GAAG,EAAE,GAAG,EACR,UAAU,CAAC,EAAE,MAAM,EACnB,iBAAiB,GAAE,iBAA+C;YAgBtD,QAAQ;IAqCtB,OAAO,CAAC,SAAS;cAUD,SAAS;cAkBT,cAAc;IAkB9B,SAAS,CAAC,QAAQ,CAAC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAEvC,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,qCAAqC;IACrC,SAAS,CAAC,KAAK,EAAE,UAAU;IAwB3B,4DAA4D;IAC5D,KAAK;IAUL,2DAA2D;IAC3D,QAAQ;IAUR,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;IAI5C,wDAAwD;IACxD,KAAK;IAOL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,YAAY;CAGvC"}
|