@livekit/agents 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/stt.cjs +213 -155
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +4 -0
- package/dist/inference/stt.d.ts +4 -0
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +213 -155
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +2 -3
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +2 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/stt/stt.cjs +18 -5
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +18 -5
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +16 -16
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +16 -16
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +15 -22
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +0 -1
- package/dist/utils.d.ts +0 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +15 -21
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +2 -1
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +3 -10
- package/dist/utils.test.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/stt.ts +249 -169
- package/src/inference/tts.ts +4 -3
- package/src/stt/stt.ts +18 -5
- package/src/tts/tts.ts +24 -16
- package/src/utils.test.ts +3 -10
- package/src/utils.ts +21 -27
package/src/inference/stt.ts
CHANGED
|
@@ -3,10 +3,10 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { type AudioFrame } from '@livekit/rtc-node';
|
|
5
5
|
import type { WebSocket } from 'ws';
|
|
6
|
-
import { type RawData } from 'ws';
|
|
7
6
|
import { APIError, APIStatusError } from '../_exceptions.js';
|
|
8
7
|
import { AudioByteStream } from '../audio.js';
|
|
9
8
|
import { log } from '../log.js';
|
|
9
|
+
import { createStreamChannel } from '../stream/stream_channel.js';
|
|
10
10
|
import {
|
|
11
11
|
STT as BaseSTT,
|
|
12
12
|
SpeechStream as BaseSpeechStream,
|
|
@@ -198,6 +198,39 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
198
198
|
|
|
199
199
|
return stream;
|
|
200
200
|
}
|
|
201
|
+
|
|
202
|
+
async connectWs(timeout: number): Promise<WebSocket> {
|
|
203
|
+
const params = {
|
|
204
|
+
settings: {
|
|
205
|
+
sample_rate: String(this.opts.sampleRate),
|
|
206
|
+
encoding: this.opts.encoding,
|
|
207
|
+
extra: this.opts.modelOptions,
|
|
208
|
+
},
|
|
209
|
+
} as Record<string, unknown>;
|
|
210
|
+
|
|
211
|
+
if (this.opts.model && this.opts.model !== 'auto') {
|
|
212
|
+
params.model = this.opts.model;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
if (this.opts.language) {
|
|
216
|
+
(params.settings as Record<string, unknown>).language = this.opts.language;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
let baseURL = this.opts.baseURL;
|
|
220
|
+
if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {
|
|
221
|
+
baseURL = baseURL.replace('http', 'ws');
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
|
|
225
|
+
const url = `${baseURL}/stt`;
|
|
226
|
+
const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;
|
|
227
|
+
|
|
228
|
+
const socket = await connectWs(url, headers, timeout);
|
|
229
|
+
const msg = { ...params, type: 'session.create' };
|
|
230
|
+
socket.send(JSON.stringify(msg));
|
|
231
|
+
|
|
232
|
+
return socket;
|
|
233
|
+
}
|
|
201
234
|
}
|
|
202
235
|
|
|
203
236
|
export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
@@ -206,6 +239,8 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
206
239
|
private speaking = false;
|
|
207
240
|
private speechDuration = 0;
|
|
208
241
|
private reconnectEvent = new Event();
|
|
242
|
+
private stt: STT<TModel>;
|
|
243
|
+
private connOptions: APIConnectOptions;
|
|
209
244
|
|
|
210
245
|
#logger = log();
|
|
211
246
|
|
|
@@ -216,6 +251,8 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
216
251
|
) {
|
|
217
252
|
super(sttImpl, opts.sampleRate, connOptions);
|
|
218
253
|
this.opts = opts;
|
|
254
|
+
this.stt = sttImpl;
|
|
255
|
+
this.connOptions = connOptions;
|
|
219
256
|
}
|
|
220
257
|
|
|
221
258
|
get label(): string {
|
|
@@ -224,222 +261,265 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
224
261
|
|
|
225
262
|
updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
|
|
226
263
|
this.opts = { ...this.opts, ...opts };
|
|
264
|
+
this.reconnectEvent.set();
|
|
227
265
|
}
|
|
228
266
|
|
|
229
267
|
protected async run(): Promise<void> {
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
268
|
+
while (true) {
|
|
269
|
+
// Create fresh resources for each connection attempt
|
|
270
|
+
let ws: WebSocket | null = null;
|
|
271
|
+
let closing = false;
|
|
272
|
+
let finalReceived = false;
|
|
273
|
+
|
|
274
|
+
type SttServerEvent = Record<string, any>;
|
|
275
|
+
const eventChannel = createStreamChannel<SttServerEvent>();
|
|
276
|
+
|
|
277
|
+
const resourceCleanup = () => {
|
|
278
|
+
if (closing) return;
|
|
279
|
+
closing = true;
|
|
280
|
+
eventChannel.close();
|
|
281
|
+
ws?.removeAllListeners();
|
|
282
|
+
ws?.close();
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {
|
|
286
|
+
return new Promise<void>((resolve, reject) => {
|
|
287
|
+
const onAbort = () => {
|
|
288
|
+
resourceCleanup();
|
|
289
|
+
reject(new Error('WebSocket connection aborted'));
|
|
290
|
+
};
|
|
247
291
|
|
|
248
|
-
|
|
249
|
-
(params.settings as Record<string, unknown>).language = this.opts.language;
|
|
250
|
-
}
|
|
292
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
251
293
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
294
|
+
ws.on('message', (data) => {
|
|
295
|
+
const json = JSON.parse(data.toString()) as SttServerEvent;
|
|
296
|
+
eventChannel.write(json);
|
|
297
|
+
});
|
|
256
298
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
299
|
+
ws.on('error', (e) => {
|
|
300
|
+
this.#logger.error({ error: e }, 'WebSocket error');
|
|
301
|
+
resourceCleanup();
|
|
302
|
+
reject(e);
|
|
303
|
+
});
|
|
260
304
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
socket.send(JSON.stringify(msg));
|
|
305
|
+
ws.on('close', (code: number) => {
|
|
306
|
+
resourceCleanup();
|
|
264
307
|
|
|
265
|
-
|
|
266
|
-
|
|
308
|
+
if (!closing) return this.#logger.error('WebSocket closed unexpectedly');
|
|
309
|
+
if (finalReceived) return resolve();
|
|
267
310
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
311
|
+
reject(
|
|
312
|
+
new APIStatusError({
|
|
313
|
+
message: 'LiveKit STT connection closed unexpectedly',
|
|
314
|
+
options: { statusCode: code },
|
|
315
|
+
}),
|
|
316
|
+
);
|
|
317
|
+
});
|
|
318
|
+
});
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
const send = async (socket: WebSocket, signal: AbortSignal) => {
|
|
322
|
+
const audioStream = new AudioByteStream(
|
|
323
|
+
this.opts.sampleRate,
|
|
324
|
+
1,
|
|
325
|
+
Math.floor(this.opts.sampleRate / 20), // 50ms
|
|
326
|
+
);
|
|
327
|
+
|
|
328
|
+
// Create abort promise once to avoid memory leak
|
|
329
|
+
const abortPromise = new Promise<never>((_, reject) => {
|
|
330
|
+
if (signal.aborted) {
|
|
331
|
+
return reject(new Error('Send aborted'));
|
|
332
|
+
}
|
|
333
|
+
const onAbort = () => reject(new Error('Send aborted'));
|
|
334
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
335
|
+
});
|
|
285
336
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
}
|
|
292
|
-
}
|
|
337
|
+
// Manual iteration to support cancellation
|
|
338
|
+
const iterator = this.input[Symbol.asyncIterator]();
|
|
339
|
+
try {
|
|
340
|
+
while (true) {
|
|
341
|
+
const result = await Promise.race([iterator.next(), abortPromise]);
|
|
293
342
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
};
|
|
343
|
+
if (result.done) break;
|
|
344
|
+
const ev = result.value;
|
|
297
345
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
const messageHandler = (d: RawData) => {
|
|
302
|
-
resolve(d.toString());
|
|
303
|
-
removeListeners();
|
|
304
|
-
};
|
|
305
|
-
const errorHandler = (e: Error) => {
|
|
306
|
-
reject(e);
|
|
307
|
-
removeListeners();
|
|
308
|
-
};
|
|
309
|
-
const closeHandler = (code: number) => {
|
|
310
|
-
if (closingWs) {
|
|
311
|
-
resolve('');
|
|
346
|
+
let frames: AudioFrame[];
|
|
347
|
+
if (ev === SpeechStream.FLUSH_SENTINEL) {
|
|
348
|
+
frames = audioStream.flush();
|
|
312
349
|
} else {
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
message: 'LiveKit STT connection closed unexpectedly',
|
|
316
|
-
options: { statusCode: code },
|
|
317
|
-
}),
|
|
318
|
-
);
|
|
350
|
+
const frame = ev as AudioFrame;
|
|
351
|
+
frames = audioStream.write(new Int16Array(frame.data).buffer);
|
|
319
352
|
}
|
|
320
|
-
removeListeners();
|
|
321
|
-
};
|
|
322
|
-
const removeListeners = () => {
|
|
323
|
-
socket.removeListener('message', messageHandler);
|
|
324
|
-
socket.removeListener('error', errorHandler);
|
|
325
|
-
socket.removeListener('close', closeHandler);
|
|
326
|
-
};
|
|
327
|
-
socket.once('message', messageHandler);
|
|
328
|
-
socket.once('error', errorHandler);
|
|
329
|
-
socket.once('close', closeHandler);
|
|
330
|
-
});
|
|
331
353
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
this.processTranscript(json, true);
|
|
349
|
-
break;
|
|
350
|
-
case 'error':
|
|
351
|
-
this.#logger.error('received error from LiveKit STT: %o', json);
|
|
352
|
-
throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
|
|
353
|
-
default:
|
|
354
|
-
this.#logger.warn('received unexpected message from LiveKit STT: %o', json);
|
|
355
|
-
break;
|
|
354
|
+
for (const frame of frames) {
|
|
355
|
+
this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
|
|
356
|
+
const base64 = Buffer.from(frame.data.buffer).toString('base64');
|
|
357
|
+
const msg = { type: 'input_audio', audio: base64 };
|
|
358
|
+
socket.send(JSON.stringify(msg));
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
closing = true;
|
|
363
|
+
socket.send(JSON.stringify({ type: 'session.finalize' }));
|
|
364
|
+
} catch (e) {
|
|
365
|
+
if ((e as Error).message === 'Send aborted') {
|
|
366
|
+
// Expected abort, don't log
|
|
367
|
+
return;
|
|
368
|
+
}
|
|
369
|
+
throw e;
|
|
356
370
|
}
|
|
357
|
-
}
|
|
358
|
-
};
|
|
359
|
-
|
|
360
|
-
while (true) {
|
|
361
|
-
try {
|
|
362
|
-
ws = await connect();
|
|
371
|
+
};
|
|
363
372
|
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
373
|
+
const recv = async (signal: AbortSignal) => {
|
|
374
|
+
const serverEventStream = eventChannel.stream();
|
|
375
|
+
const reader = serverEventStream.getReader();
|
|
367
376
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
377
|
+
try {
|
|
378
|
+
while (!this.closed && !signal.aborted) {
|
|
379
|
+
const result = await reader.read();
|
|
380
|
+
if (signal.aborted) return;
|
|
381
|
+
if (result.done) return;
|
|
382
|
+
|
|
383
|
+
const json = result.value;
|
|
384
|
+
const type = json.type as string | undefined;
|
|
385
|
+
|
|
386
|
+
switch (type) {
|
|
387
|
+
case 'session.created':
|
|
388
|
+
case 'session.finalized':
|
|
389
|
+
break;
|
|
390
|
+
case 'session.closed':
|
|
391
|
+
finalReceived = true;
|
|
392
|
+
resourceCleanup();
|
|
393
|
+
break;
|
|
394
|
+
case 'interim_transcript':
|
|
395
|
+
this.processTranscript(json, false);
|
|
396
|
+
break;
|
|
397
|
+
case 'final_transcript':
|
|
398
|
+
this.processTranscript(json, true);
|
|
399
|
+
break;
|
|
400
|
+
case 'error':
|
|
401
|
+
this.#logger.error({ error: json }, 'Received error from LiveKit STT');
|
|
402
|
+
resourceCleanup();
|
|
403
|
+
throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
|
|
404
|
+
default:
|
|
405
|
+
this.#logger.warn(
|
|
406
|
+
{ message: json },
|
|
407
|
+
'Received unexpected message from LiveKit STT',
|
|
408
|
+
);
|
|
409
|
+
break;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
} finally {
|
|
413
|
+
reader.releaseLock();
|
|
414
|
+
try {
|
|
415
|
+
await serverEventStream.cancel();
|
|
416
|
+
} catch (e) {
|
|
417
|
+
this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
};
|
|
371
421
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
422
|
+
try {
|
|
423
|
+
ws = await this.stt.connectWs(this.connOptions.timeoutMs);
|
|
424
|
+
|
|
425
|
+
// Wrap tasks for proper cancellation support using Task signals
|
|
426
|
+
const controller = new AbortController();
|
|
427
|
+
const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);
|
|
428
|
+
const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);
|
|
429
|
+
const recvTask = Task.from(({ signal }) => recv(signal), controller);
|
|
430
|
+
const waitReconnectTask = Task.from(
|
|
431
|
+
({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),
|
|
432
|
+
controller,
|
|
433
|
+
);
|
|
376
434
|
|
|
377
435
|
try {
|
|
378
436
|
await Promise.race([
|
|
379
|
-
Promise.all(
|
|
437
|
+
Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),
|
|
380
438
|
waitReconnectTask.result,
|
|
381
439
|
]);
|
|
382
440
|
|
|
441
|
+
// If reconnect didn't trigger, tasks finished - exit loop
|
|
383
442
|
if (!waitReconnectTask.done) break;
|
|
443
|
+
|
|
444
|
+
// Reconnect triggered - clear event and continue loop
|
|
384
445
|
this.reconnectEvent.clear();
|
|
385
446
|
} finally {
|
|
386
|
-
|
|
447
|
+
// Cancel all tasks to ensure cleanup
|
|
448
|
+
await cancelAndWait(
|
|
449
|
+
[sendTask, wsListenerTask, recvTask, waitReconnectTask],
|
|
450
|
+
DEFAULT_CANCEL_TIMEOUT,
|
|
451
|
+
);
|
|
452
|
+
resourceCleanup();
|
|
387
453
|
}
|
|
388
454
|
} finally {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
} catch {}
|
|
455
|
+
// Ensure cleanup even if connectWs throws
|
|
456
|
+
resourceCleanup();
|
|
392
457
|
}
|
|
393
458
|
}
|
|
394
459
|
}
|
|
395
460
|
|
|
396
461
|
private processTranscript(data: Record<string, any>, isFinal: boolean) {
|
|
462
|
+
// Check if queue is closed to avoid race condition during disconnect
|
|
463
|
+
if (this.queue.closed) return;
|
|
464
|
+
|
|
397
465
|
const requestId = data.request_id ?? this.requestId;
|
|
398
466
|
const text = data.transcript ?? '';
|
|
399
467
|
const language = data.language ?? this.opts.language ?? 'en';
|
|
400
468
|
|
|
401
469
|
if (!text && !isFinal) return;
|
|
402
470
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
this.speaking
|
|
406
|
-
|
|
407
|
-
|
|
471
|
+
try {
|
|
472
|
+
// We'll have a more accurate way of detecting when speech started when we have VAD
|
|
473
|
+
if (!this.speaking) {
|
|
474
|
+
this.speaking = true;
|
|
475
|
+
this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
|
|
476
|
+
}
|
|
408
477
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
478
|
+
const speechData: SpeechData = {
|
|
479
|
+
language,
|
|
480
|
+
startTime: data.start ?? 0,
|
|
481
|
+
endTime: data.duration ?? 0,
|
|
482
|
+
confidence: data.confidence ?? 1.0,
|
|
483
|
+
text,
|
|
484
|
+
};
|
|
485
|
+
|
|
486
|
+
if (isFinal) {
|
|
487
|
+
if (this.speechDuration > 0) {
|
|
488
|
+
this.queue.put({
|
|
489
|
+
type: SpeechEventType.RECOGNITION_USAGE,
|
|
490
|
+
requestId,
|
|
491
|
+
recognitionUsage: { audioDuration: this.speechDuration },
|
|
492
|
+
});
|
|
493
|
+
this.speechDuration = 0;
|
|
494
|
+
}
|
|
416
495
|
|
|
417
|
-
if (isFinal) {
|
|
418
|
-
if (this.speechDuration > 0) {
|
|
419
496
|
this.queue.put({
|
|
420
|
-
type: SpeechEventType.
|
|
497
|
+
type: SpeechEventType.FINAL_TRANSCRIPT,
|
|
421
498
|
requestId,
|
|
422
|
-
|
|
499
|
+
alternatives: [speechData],
|
|
423
500
|
});
|
|
424
|
-
this.speechDuration = 0;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
this.queue.put({
|
|
428
|
-
type: SpeechEventType.FINAL_TRANSCRIPT,
|
|
429
|
-
requestId,
|
|
430
|
-
alternatives: [speechData],
|
|
431
|
-
});
|
|
432
501
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
502
|
+
if (this.speaking) {
|
|
503
|
+
this.speaking = false;
|
|
504
|
+
this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
|
|
505
|
+
}
|
|
506
|
+
} else {
|
|
507
|
+
this.queue.put({
|
|
508
|
+
type: SpeechEventType.INTERIM_TRANSCRIPT,
|
|
509
|
+
requestId,
|
|
510
|
+
alternatives: [speechData],
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
} catch (e) {
|
|
514
|
+
if (e instanceof Error && e.message.includes('Queue is closed')) {
|
|
515
|
+
// Expected behavior on disconnect, log as warning
|
|
516
|
+
this.#logger.warn(
|
|
517
|
+
{ err: e },
|
|
518
|
+
'Queue closed during transcript processing (expected during disconnect)',
|
|
519
|
+
);
|
|
520
|
+
} else {
|
|
521
|
+
this.#logger.error({ err: e }, 'Error putting transcript to queue');
|
|
436
522
|
}
|
|
437
|
-
} else {
|
|
438
|
-
this.queue.put({
|
|
439
|
-
type: SpeechEventType.INTERIM_TRANSCRIPT,
|
|
440
|
-
requestId,
|
|
441
|
-
alternatives: [speechData],
|
|
442
|
-
});
|
|
443
523
|
}
|
|
444
524
|
}
|
|
445
525
|
}
|
package/src/inference/tts.ts
CHANGED
|
@@ -277,6 +277,9 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
277
277
|
};
|
|
278
278
|
|
|
279
279
|
const sendClientEvent = async (event: TtsClientEvent) => {
|
|
280
|
+
// Don't send events to a closed WebSocket or aborted controller
|
|
281
|
+
if (this.abortController.signal.aborted || closing) return;
|
|
282
|
+
|
|
280
283
|
const validatedEvent = await ttsClientEventSchema.parseAsync(event);
|
|
281
284
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
282
285
|
this.#logger.warn('Trying to send client TTS event to a closed WebSocket');
|
|
@@ -321,7 +324,7 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
321
324
|
return new Promise<void>((resolve, reject) => {
|
|
322
325
|
this.abortController.signal.addEventListener('abort', () => {
|
|
323
326
|
resourceCleanup();
|
|
324
|
-
|
|
327
|
+
resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error
|
|
325
328
|
});
|
|
326
329
|
|
|
327
330
|
ws.on('message', async (data) => {
|
|
@@ -420,8 +423,6 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
420
423
|
createWsListenerTask(ws),
|
|
421
424
|
createRecvTask(),
|
|
422
425
|
]);
|
|
423
|
-
} catch (e) {
|
|
424
|
-
this.#logger.error({ error: e }, 'Error in SynthesizeStream');
|
|
425
426
|
} finally {
|
|
426
427
|
resourceCleanup();
|
|
427
428
|
}
|
package/src/stt/stt.ts
CHANGED
|
@@ -257,7 +257,18 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
257
257
|
|
|
258
258
|
protected async monitorMetrics() {
|
|
259
259
|
for await (const event of this.queue) {
|
|
260
|
-
this.output.
|
|
260
|
+
if (!this.output.closed) {
|
|
261
|
+
try {
|
|
262
|
+
this.output.put(event);
|
|
263
|
+
} catch (e) {
|
|
264
|
+
if (e instanceof Error && e.message.includes('Queue is closed')) {
|
|
265
|
+
this.logger.warn(
|
|
266
|
+
{ err: e },
|
|
267
|
+
'Queue closed during transcript processing (expected during disconnect)',
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
261
272
|
if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
|
|
262
273
|
const metrics: STTMetrics = {
|
|
263
274
|
type: 'stt_metrics',
|
|
@@ -270,7 +281,9 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
270
281
|
};
|
|
271
282
|
this.#stt.emit('metrics_collected', metrics);
|
|
272
283
|
}
|
|
273
|
-
this.output.
|
|
284
|
+
if (!this.output.closed) {
|
|
285
|
+
this.output.close();
|
|
286
|
+
}
|
|
274
287
|
}
|
|
275
288
|
|
|
276
289
|
protected abstract run(): Promise<void>;
|
|
@@ -336,9 +349,9 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
|
|
|
336
349
|
|
|
337
350
|
/** Close both the input and output of the STT stream */
|
|
338
351
|
close() {
|
|
339
|
-
this.input.close();
|
|
340
|
-
this.queue.close();
|
|
341
|
-
this.output.close();
|
|
352
|
+
if (!this.input.closed) this.input.close();
|
|
353
|
+
if (!this.queue.closed) this.queue.close();
|
|
354
|
+
if (!this.output.closed) this.output.close();
|
|
342
355
|
this.closed = true;
|
|
343
356
|
}
|
|
344
357
|
|
package/src/tts/tts.ts
CHANGED
|
@@ -209,7 +209,16 @@ export abstract class SynthesizeStream
|
|
|
209
209
|
});
|
|
210
210
|
}
|
|
211
211
|
|
|
212
|
-
//
|
|
212
|
+
// NOTE(AJS-37): The implementation below uses an AsyncIterableQueue (`this.input`)
|
|
213
|
+
// bridged from a DeferredReadableStream (`this.deferredInputStream`) rather than
|
|
214
|
+
// consuming the stream directly.
|
|
215
|
+
//
|
|
216
|
+
// A full refactor to native Web Streams was considered but is currently deferred.
|
|
217
|
+
// The primary reason is to maintain architectural parity with the Python SDK,
|
|
218
|
+
// which is a key design goal for the project. This ensures a consistent developer
|
|
219
|
+
// experience across both platforms.
|
|
220
|
+
//
|
|
221
|
+
// For more context, see the discussion in GitHub issue # 844.
|
|
213
222
|
protected async pumpInput() {
|
|
214
223
|
const reader = this.deferredInputStream.stream.getReader();
|
|
215
224
|
try {
|
|
@@ -298,12 +307,11 @@ export abstract class SynthesizeStream
|
|
|
298
307
|
}
|
|
299
308
|
this.#metricsText += text;
|
|
300
309
|
|
|
301
|
-
if (this.input.closed) {
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
if (this.closed) {
|
|
305
|
-
throw new Error('Stream is closed');
|
|
310
|
+
if (this.input.closed || this.closed) {
|
|
311
|
+
// Stream was aborted/closed, silently skip
|
|
312
|
+
return;
|
|
306
313
|
}
|
|
314
|
+
|
|
307
315
|
this.input.put(text);
|
|
308
316
|
}
|
|
309
317
|
|
|
@@ -313,24 +321,24 @@ export abstract class SynthesizeStream
|
|
|
313
321
|
this.#metricsPendingTexts.push(this.#metricsText);
|
|
314
322
|
this.#metricsText = '';
|
|
315
323
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
throw new Error('Stream is closed');
|
|
324
|
+
|
|
325
|
+
if (this.input.closed || this.closed) {
|
|
326
|
+
// Stream was aborted/closed, silently skip
|
|
327
|
+
return;
|
|
321
328
|
}
|
|
329
|
+
|
|
322
330
|
this.input.put(SynthesizeStream.FLUSH_SENTINEL);
|
|
323
331
|
}
|
|
324
332
|
|
|
325
333
|
/** Mark the input as ended and forbid additional pushes */
|
|
326
334
|
endInput() {
|
|
327
335
|
this.flush();
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
throw new Error('Stream is closed');
|
|
336
|
+
|
|
337
|
+
if (this.input.closed || this.closed) {
|
|
338
|
+
// Stream was aborted/closed, silently skip
|
|
339
|
+
return;
|
|
333
340
|
}
|
|
341
|
+
|
|
334
342
|
this.input.close();
|
|
335
343
|
}
|
|
336
344
|
|
package/src/utils.test.ts
CHANGED
|
@@ -5,15 +5,7 @@ import { AudioFrame } from '@livekit/rtc-node';
|
|
|
5
5
|
import { ReadableStream } from 'node:stream/web';
|
|
6
6
|
import { describe, expect, it } from 'vitest';
|
|
7
7
|
import { initializeLogger } from '../src/log.js';
|
|
8
|
-
import {
|
|
9
|
-
Event,
|
|
10
|
-
TASK_TIMEOUT_ERROR,
|
|
11
|
-
Task,
|
|
12
|
-
TaskResult,
|
|
13
|
-
delay,
|
|
14
|
-
isPending,
|
|
15
|
-
resampleStream,
|
|
16
|
-
} from '../src/utils.js';
|
|
8
|
+
import { Event, Task, TaskResult, delay, isPending, resampleStream } from '../src/utils.js';
|
|
17
9
|
|
|
18
10
|
describe('utils', () => {
|
|
19
11
|
// initialize logger
|
|
@@ -442,7 +434,8 @@ describe('utils', () => {
|
|
|
442
434
|
await task.cancelAndWait(200);
|
|
443
435
|
expect.fail('Task should have timed out');
|
|
444
436
|
} catch (error: unknown) {
|
|
445
|
-
expect(error).
|
|
437
|
+
expect(error).instanceof(Error);
|
|
438
|
+
expect((error as Error).message).toBe('Task cancellation timed out');
|
|
446
439
|
}
|
|
447
440
|
});
|
|
448
441
|
|