@livekit/agents 1.0.27 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/connection_pool.cjs +242 -0
- package/dist/connection_pool.cjs.map +1 -0
- package/dist/connection_pool.d.cts +123 -0
- package/dist/connection_pool.d.ts +123 -0
- package/dist/connection_pool.d.ts.map +1 -0
- package/dist/connection_pool.js +218 -0
- package/dist/connection_pool.js.map +1 -0
- package/dist/connection_pool.test.cjs +256 -0
- package/dist/connection_pool.test.cjs.map +1 -0
- package/dist/connection_pool.test.js +255 -0
- package/dist/connection_pool.test.js.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/tts.cjs +172 -56
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +3 -0
- package/dist/inference/tts.d.ts +3 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +173 -57
- package/dist/inference/tts.js.map +1 -1
- package/dist/utils.cjs +20 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +19 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +3 -1
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +3 -1
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +4 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +4 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +1 -1
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.js +1 -1
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs +77 -37
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/background_audio.d.cts +10 -3
- package/dist/voice/background_audio.d.ts +10 -3
- package/dist/voice/background_audio.d.ts.map +1 -1
- package/dist/voice/background_audio.js +78 -37
- package/dist/voice/background_audio.js.map +1 -1
- package/dist/voice/index.cjs +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs +10 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +18 -1
- package/dist/voice/io.d.ts +18 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +10 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +1 -1
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +1 -1
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.js +1 -1
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/transcription/synchronizer.cjs +1 -1
- package/dist/voice/transcription/synchronizer.cjs.map +1 -1
- package/dist/voice/transcription/synchronizer.js +1 -1
- package/dist/voice/transcription/synchronizer.js.map +1 -1
- package/dist/worker.cjs +4 -6
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -6
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
- package/src/connection_pool.test.ts +346 -0
- package/src/connection_pool.ts +307 -0
- package/src/index.ts +1 -0
- package/src/inference/tts.ts +206 -63
- package/src/utils.ts +25 -0
- package/src/voice/agent_activity.ts +7 -1
- package/src/voice/agent_session.ts +4 -1
- package/src/voice/avatar/datastream_io.ts +1 -1
- package/src/voice/background_audio.ts +95 -55
- package/src/voice/index.ts +1 -0
- package/src/voice/io.ts +24 -0
- package/src/voice/recorder_io/recorder_io.ts +1 -1
- package/src/voice/room_io/_output.ts +1 -1
- package/src/voice/transcription/synchronizer.ts +1 -1
- package/src/worker.ts +4 -7
package/src/inference/tts.ts
CHANGED
|
@@ -5,13 +5,14 @@ import type { AudioFrame } from '@livekit/rtc-node';
|
|
|
5
5
|
import { WebSocket } from 'ws';
|
|
6
6
|
import { APIError, APIStatusError } from '../_exceptions.js';
|
|
7
7
|
import { AudioByteStream } from '../audio.js';
|
|
8
|
+
import { ConnectionPool } from '../connection_pool.js';
|
|
8
9
|
import { log } from '../log.js';
|
|
9
10
|
import { createStreamChannel } from '../stream/stream_channel.js';
|
|
10
11
|
import { basic as tokenizeBasic } from '../tokenize/index.js';
|
|
11
12
|
import type { ChunkedStream } from '../tts/index.js';
|
|
12
13
|
import { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';
|
|
13
14
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
14
|
-
import { shortuuid } from '../utils.js';
|
|
15
|
+
import { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';
|
|
15
16
|
import {
|
|
16
17
|
type TtsClientEvent,
|
|
17
18
|
type TtsServerEvent,
|
|
@@ -95,6 +96,7 @@ export interface InferenceTTSOptions<TModel extends TTSModels> {
|
|
|
95
96
|
export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
96
97
|
private opts: InferenceTTSOptions<TModel>;
|
|
97
98
|
private streams: Set<SynthesizeStream<TModel>> = new Set();
|
|
99
|
+
pool: ConnectionPool<WebSocket>;
|
|
98
100
|
|
|
99
101
|
#logger = log();
|
|
100
102
|
|
|
@@ -165,6 +167,15 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
|
165
167
|
apiSecret: lkApiSecret,
|
|
166
168
|
modelOptions,
|
|
167
169
|
};
|
|
170
|
+
|
|
171
|
+
// Initialize connection pool
|
|
172
|
+
this.pool = new ConnectionPool<WebSocket>({
|
|
173
|
+
connectCb: (timeout) => this.connectWs(timeout),
|
|
174
|
+
closeCb: (ws) => this.closeWs(ws),
|
|
175
|
+
maxSessionDuration: 300_000,
|
|
176
|
+
markRefreshedOnGet: true,
|
|
177
|
+
connectTimeout: 10_000, // 10 seconds default
|
|
178
|
+
});
|
|
168
179
|
}
|
|
169
180
|
|
|
170
181
|
get label() {
|
|
@@ -218,6 +229,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
|
218
229
|
if (this.opts.model) params.model = this.opts.model;
|
|
219
230
|
if (this.opts.language) params.language = this.opts.language;
|
|
220
231
|
|
|
232
|
+
this.#logger.debug({ url }, 'inference.TTS creating new websocket connection (pool miss)');
|
|
221
233
|
const socket = await connectWs(url, headers, timeout);
|
|
222
234
|
socket.send(JSON.stringify(params));
|
|
223
235
|
return socket;
|
|
@@ -227,11 +239,16 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
|
|
|
227
239
|
await ws.close();
|
|
228
240
|
}
|
|
229
241
|
|
|
242
|
+
prewarm(): void {
|
|
243
|
+
this.pool.prewarm();
|
|
244
|
+
}
|
|
245
|
+
|
|
230
246
|
async close() {
|
|
231
247
|
for (const stream of this.streams) {
|
|
232
248
|
await stream.close();
|
|
233
249
|
}
|
|
234
250
|
this.streams.clear();
|
|
251
|
+
await this.pool.close();
|
|
235
252
|
}
|
|
236
253
|
}
|
|
237
254
|
|
|
@@ -256,30 +273,31 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
256
273
|
}
|
|
257
274
|
|
|
258
275
|
protected async run(): Promise<void> {
|
|
259
|
-
let ws: WebSocket | null = null;
|
|
260
276
|
let closing = false;
|
|
261
|
-
let finalReceived = false;
|
|
262
277
|
let lastFrame: AudioFrame | undefined;
|
|
263
278
|
|
|
264
279
|
const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();
|
|
265
280
|
const eventChannel = createStreamChannel<TtsServerEvent>();
|
|
266
281
|
const requestId = shortuuid('tts_request_');
|
|
282
|
+
const inputSentEvent = new Event();
|
|
283
|
+
|
|
284
|
+
// Signal for protocol-driven completion (when 'done' message is received)
|
|
285
|
+
const completionFuture = new Future<void>();
|
|
267
286
|
|
|
268
|
-
const resourceCleanup = () => {
|
|
287
|
+
const resourceCleanup = async () => {
|
|
269
288
|
if (closing) return;
|
|
270
289
|
closing = true;
|
|
271
290
|
sendTokenizerStream.close();
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
ws?.close();
|
|
291
|
+
// close() returns a promise; don't leak it
|
|
292
|
+
await eventChannel.close();
|
|
275
293
|
};
|
|
276
294
|
|
|
277
|
-
const sendClientEvent = async (event: TtsClientEvent) => {
|
|
295
|
+
const sendClientEvent = async (event: TtsClientEvent, ws: WebSocket, signal: AbortSignal) => {
|
|
278
296
|
// Don't send events to a closed WebSocket or aborted controller
|
|
279
|
-
if (
|
|
297
|
+
if (signal.aborted || closing) return;
|
|
280
298
|
|
|
281
299
|
const validatedEvent = await ttsClientEventSchema.parseAsync(event);
|
|
282
|
-
if (
|
|
300
|
+
if (ws.readyState !== WebSocket.OPEN) {
|
|
283
301
|
this.#logger.warn('Trying to send client TTS event to a closed WebSocket');
|
|
284
302
|
return;
|
|
285
303
|
}
|
|
@@ -293,9 +311,9 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
293
311
|
}
|
|
294
312
|
};
|
|
295
313
|
|
|
296
|
-
const createInputTask = async () => {
|
|
314
|
+
const createInputTask = async (signal: AbortSignal) => {
|
|
297
315
|
for await (const data of this.input) {
|
|
298
|
-
if (
|
|
316
|
+
if (signal.aborted || closing) break;
|
|
299
317
|
if (data === SynthesizeStream.FLUSH_SENTINEL) {
|
|
300
318
|
sendTokenizerStream.flush();
|
|
301
319
|
continue;
|
|
@@ -308,55 +326,108 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
308
326
|
}
|
|
309
327
|
};
|
|
310
328
|
|
|
311
|
-
const createSentenceStreamTask = async () => {
|
|
329
|
+
const createSentenceStreamTask = async (ws: WebSocket, signal: AbortSignal) => {
|
|
312
330
|
for await (const ev of sendTokenizerStream) {
|
|
313
|
-
if (
|
|
314
|
-
|
|
315
|
-
sendClientEvent(
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
331
|
+
if (signal.aborted || closing) break;
|
|
332
|
+
|
|
333
|
+
await sendClientEvent(
|
|
334
|
+
{
|
|
335
|
+
type: 'input_transcript',
|
|
336
|
+
transcript: ev.token + ' ',
|
|
337
|
+
},
|
|
338
|
+
ws,
|
|
339
|
+
signal,
|
|
340
|
+
);
|
|
341
|
+
inputSentEvent.set();
|
|
319
342
|
}
|
|
320
343
|
|
|
321
|
-
sendClientEvent({ type: 'session.flush' });
|
|
344
|
+
await sendClientEvent({ type: 'session.flush' }, ws, signal);
|
|
345
|
+
// needed in case empty input is sent
|
|
346
|
+
inputSentEvent.set();
|
|
322
347
|
};
|
|
323
348
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
});
|
|
330
|
-
|
|
331
|
-
ws.on('message', async (data) => {
|
|
349
|
+
// Handles WebSocket message routing and error handling
|
|
350
|
+
// Completes based on protocol messages, NOT on ws.close()
|
|
351
|
+
const createWsListenerTask = async (ws: WebSocket, signal: AbortSignal) => {
|
|
352
|
+
const onMessage = (data: Buffer) => {
|
|
353
|
+
try {
|
|
332
354
|
const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;
|
|
333
355
|
const validatedEvent = ttsServerEventSchema.parse(eventJson);
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
})
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
if (!closing) return this.#logger.error('WebSocket closed unexpectedly');
|
|
347
|
-
if (finalReceived) return resolve();
|
|
356
|
+
// writer.write returns a promise; avoid unhandled rejections if stream is closed
|
|
357
|
+
void eventChannel.write(validatedEvent).catch((error) => {
|
|
358
|
+
this.#logger.debug(
|
|
359
|
+
{ error },
|
|
360
|
+
'Failed writing TTS event to stream channel (likely closed)',
|
|
361
|
+
);
|
|
362
|
+
});
|
|
363
|
+
} catch (e) {
|
|
364
|
+
this.#logger.error({ error: e }, 'Error parsing WebSocket message');
|
|
365
|
+
}
|
|
366
|
+
};
|
|
348
367
|
|
|
349
|
-
|
|
368
|
+
const onError = (e: Error) => {
|
|
369
|
+
this.#logger.error({ error: e }, 'WebSocket error');
|
|
370
|
+
void resourceCleanup();
|
|
371
|
+
try {
|
|
372
|
+
// If the ws is misbehaving, hard-stop it immediately to avoid buffering.
|
|
373
|
+
ws.terminate?.();
|
|
374
|
+
} catch {
|
|
375
|
+
// ignore
|
|
376
|
+
}
|
|
377
|
+
// Ensure this ws is not reused
|
|
378
|
+
this.tts.pool.remove(ws);
|
|
379
|
+
completionFuture.reject(e);
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
const onClose = () => {
|
|
383
|
+
// WebSocket closed unexpectedly (not by us)
|
|
384
|
+
if (!closing) {
|
|
385
|
+
this.#logger.error('WebSocket closed unexpectedly');
|
|
386
|
+
void resourceCleanup();
|
|
387
|
+
// Ensure this ws is not reused
|
|
388
|
+
this.tts.pool.remove(ws);
|
|
389
|
+
completionFuture.reject(
|
|
350
390
|
new APIStatusError({
|
|
351
391
|
message: 'Gateway connection closed unexpectedly',
|
|
352
392
|
options: { requestId },
|
|
353
393
|
}),
|
|
354
394
|
);
|
|
355
|
-
}
|
|
356
|
-
}
|
|
395
|
+
}
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
const onAbort = () => {
|
|
399
|
+
void resourceCleanup();
|
|
400
|
+
try {
|
|
401
|
+
// On interruption/abort, close the websocket immediately so the server stops streaming
|
|
402
|
+
// and the ws library doesn't buffer unread frames in memory.
|
|
403
|
+
ws.terminate?.();
|
|
404
|
+
} catch {
|
|
405
|
+
// ignore
|
|
406
|
+
}
|
|
407
|
+
this.tts.pool.remove(ws);
|
|
408
|
+
inputSentEvent.set();
|
|
409
|
+
completionFuture.resolve();
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
// Attach listeners
|
|
413
|
+
ws.on('message', onMessage);
|
|
414
|
+
ws.on('error', onError);
|
|
415
|
+
ws.on('close', onClose);
|
|
416
|
+
signal.addEventListener('abort', onAbort);
|
|
417
|
+
|
|
418
|
+
try {
|
|
419
|
+
// Wait for protocol-driven completion or error
|
|
420
|
+
await completionFuture.await;
|
|
421
|
+
} finally {
|
|
422
|
+
// IMPORTANT: Remove listeners so connection can be reused
|
|
423
|
+
ws.off('message', onMessage);
|
|
424
|
+
ws.off('error', onError);
|
|
425
|
+
ws.off('close', onClose);
|
|
426
|
+
signal.removeEventListener('abort', onAbort);
|
|
427
|
+
}
|
|
357
428
|
};
|
|
358
429
|
|
|
359
|
-
const createRecvTask = async () => {
|
|
430
|
+
const createRecvTask = async (signal: AbortSignal) => {
|
|
360
431
|
let currentSessionId: string | null = null;
|
|
361
432
|
|
|
362
433
|
const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
|
|
@@ -364,9 +435,11 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
364
435
|
const reader = serverEventStream.getReader();
|
|
365
436
|
|
|
366
437
|
try {
|
|
367
|
-
|
|
438
|
+
await inputSentEvent.wait();
|
|
439
|
+
|
|
440
|
+
while (!this.closed && !signal.aborted) {
|
|
368
441
|
const result = await reader.read();
|
|
369
|
-
if (
|
|
442
|
+
if (signal.aborted) return;
|
|
370
443
|
if (result.done) return;
|
|
371
444
|
|
|
372
445
|
const serverEvent = result.value;
|
|
@@ -382,24 +455,29 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
382
455
|
}
|
|
383
456
|
break;
|
|
384
457
|
case 'done':
|
|
385
|
-
finalReceived = true;
|
|
386
458
|
for (const frame of bstream.flush()) {
|
|
387
459
|
sendLastFrame(currentSessionId!, false);
|
|
388
460
|
lastFrame = frame;
|
|
389
461
|
}
|
|
390
462
|
sendLastFrame(currentSessionId!, true);
|
|
391
463
|
this.queue.put(SynthesizeStream.END_OF_STREAM);
|
|
392
|
-
|
|
464
|
+
await resourceCleanup();
|
|
465
|
+
completionFuture.resolve();
|
|
466
|
+
return;
|
|
393
467
|
case 'session.closed':
|
|
394
|
-
resourceCleanup();
|
|
395
|
-
|
|
468
|
+
await resourceCleanup();
|
|
469
|
+
completionFuture.resolve();
|
|
470
|
+
return;
|
|
396
471
|
case 'error':
|
|
397
472
|
this.#logger.error(
|
|
398
473
|
{ serverEvent },
|
|
399
474
|
'Received error message from LiveKit TTS WebSocket',
|
|
400
475
|
);
|
|
401
|
-
resourceCleanup();
|
|
402
|
-
|
|
476
|
+
await resourceCleanup();
|
|
477
|
+
completionFuture.reject(
|
|
478
|
+
new APIError(`LiveKit TTS returned error: ${serverEvent.message}`),
|
|
479
|
+
);
|
|
480
|
+
return;
|
|
403
481
|
default:
|
|
404
482
|
this.#logger.warn('Unexpected message %s', serverEvent);
|
|
405
483
|
break;
|
|
@@ -416,16 +494,81 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
|
|
|
416
494
|
};
|
|
417
495
|
|
|
418
496
|
try {
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
497
|
+
await this.tts.pool.withConnection(
|
|
498
|
+
async (ws: WebSocket) => {
|
|
499
|
+
try {
|
|
500
|
+
// IMPORTANT: don't cancel the stream's controller on normal completion,
|
|
501
|
+
// otherwise the pool will remove+close the ws and every run becomes a pool miss.
|
|
502
|
+
const runController = new AbortController();
|
|
503
|
+
const onStreamAbort = () => runController.abort(this.abortController.signal.reason);
|
|
504
|
+
this.abortController.signal.addEventListener('abort', onStreamAbort, { once: true });
|
|
505
|
+
|
|
506
|
+
const tasks = [
|
|
507
|
+
Task.from(
|
|
508
|
+
async (controller) => {
|
|
509
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
510
|
+
await createInputTask(combined);
|
|
511
|
+
},
|
|
512
|
+
undefined,
|
|
513
|
+
'inference-tts-input',
|
|
514
|
+
),
|
|
515
|
+
Task.from(
|
|
516
|
+
async (controller) => {
|
|
517
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
518
|
+
await createSentenceStreamTask(ws, combined);
|
|
519
|
+
},
|
|
520
|
+
undefined,
|
|
521
|
+
'inference-tts-sentence',
|
|
522
|
+
),
|
|
523
|
+
Task.from(
|
|
524
|
+
async (controller) => {
|
|
525
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
526
|
+
await createWsListenerTask(ws, combined);
|
|
527
|
+
},
|
|
528
|
+
undefined,
|
|
529
|
+
'inference-tts-ws-listener',
|
|
530
|
+
),
|
|
531
|
+
Task.from(
|
|
532
|
+
async (controller) => {
|
|
533
|
+
const combined = combineSignals(runController.signal, controller.signal);
|
|
534
|
+
await createRecvTask(combined);
|
|
535
|
+
},
|
|
536
|
+
undefined,
|
|
537
|
+
'inference-tts-recv',
|
|
538
|
+
),
|
|
539
|
+
];
|
|
540
|
+
|
|
541
|
+
try {
|
|
542
|
+
await Promise.all(tasks.map((t) => t.result));
|
|
543
|
+
} finally {
|
|
544
|
+
// Mirror python finally: unblock recv and cancel all tasks.
|
|
545
|
+
inputSentEvent.set();
|
|
546
|
+
await resourceCleanup();
|
|
547
|
+
await cancelAndWait(tasks, 5000);
|
|
548
|
+
this.abortController.signal.removeEventListener('abort', onStreamAbort);
|
|
549
|
+
}
|
|
550
|
+
} catch (e) {
|
|
551
|
+
// If aborted, don't throw - let cleanup handle it
|
|
552
|
+
if (e instanceof Error && e.name === 'AbortError') {
|
|
553
|
+
return;
|
|
554
|
+
}
|
|
555
|
+
throw e;
|
|
556
|
+
}
|
|
557
|
+
},
|
|
558
|
+
{
|
|
559
|
+
timeout: this.connOptions.timeoutMs,
|
|
560
|
+
},
|
|
561
|
+
);
|
|
562
|
+
} catch (e) {
|
|
563
|
+
// Handle connection errors
|
|
564
|
+
if (e instanceof Error && e.name === 'AbortError') {
|
|
565
|
+
// Abort is expected during normal shutdown
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
throw e;
|
|
427
569
|
} finally {
|
|
428
|
-
|
|
570
|
+
// Ensure cleanup always runs (and don't leak the promise)
|
|
571
|
+
await resourceCleanup();
|
|
429
572
|
}
|
|
430
573
|
}
|
|
431
574
|
}
|
package/src/utils.ts
CHANGED
|
@@ -840,6 +840,31 @@ export async function waitForAbort(signal: AbortSignal) {
|
|
|
840
840
|
return await abortFuture.await;
|
|
841
841
|
}
|
|
842
842
|
|
|
843
|
+
/**
|
|
844
|
+
* Combines two abort signals into a single abort signal.
|
|
845
|
+
* @param a - The first abort signal.
|
|
846
|
+
* @param b - The second abort signal.
|
|
847
|
+
* @returns A new abort signal that is aborted when either of the input signals is aborted.
|
|
848
|
+
*/
|
|
849
|
+
export const combineSignals = (a: AbortSignal, b: AbortSignal): AbortSignal => {
|
|
850
|
+
const c = new AbortController();
|
|
851
|
+
const abortFrom = (s: AbortSignal) => {
|
|
852
|
+
if (c.signal.aborted) return;
|
|
853
|
+
c.abort((s as any).reason);
|
|
854
|
+
};
|
|
855
|
+
if (a.aborted) {
|
|
856
|
+
abortFrom(a);
|
|
857
|
+
} else {
|
|
858
|
+
a.addEventListener('abort', () => abortFrom(a), { once: true });
|
|
859
|
+
}
|
|
860
|
+
if (b.aborted) {
|
|
861
|
+
abortFrom(b);
|
|
862
|
+
} else {
|
|
863
|
+
b.addEventListener('abort', () => abortFrom(b), { once: true });
|
|
864
|
+
}
|
|
865
|
+
return c.signal;
|
|
866
|
+
};
|
|
867
|
+
|
|
843
868
|
export const isCloud = (url: URL) => {
|
|
844
869
|
const hostname = url.hostname;
|
|
845
870
|
return hostname.endsWith('.livekit.cloud') || hostname.endsWith('.livekit.run');
|
|
@@ -1449,6 +1449,13 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1449
1449
|
{ speech_id: speechHandle.id },
|
|
1450
1450
|
'Aborting all pipeline reply tasks due to interruption',
|
|
1451
1451
|
);
|
|
1452
|
+
|
|
1453
|
+
// Stop playout ASAP (don't wait for cancellations), otherwise the segment may finish and we
|
|
1454
|
+
// will correctly (but undesirably) commit a long transcript even though the user said "stop".
|
|
1455
|
+
if (audioOutput) {
|
|
1456
|
+
audioOutput.clearBuffer();
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1452
1459
|
replyAbortController.abort();
|
|
1453
1460
|
await Promise.allSettled(
|
|
1454
1461
|
tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT)),
|
|
@@ -1457,7 +1464,6 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1457
1464
|
let forwardedText = textOut?.text || '';
|
|
1458
1465
|
|
|
1459
1466
|
if (audioOutput) {
|
|
1460
|
-
audioOutput.clearBuffer();
|
|
1461
1467
|
const playbackEv = await audioOutput.waitForPlayout();
|
|
1462
1468
|
if (audioOut?.firstFrameFut.done) {
|
|
1463
1469
|
// playback EV is valid only if the first frame was already played
|
|
@@ -527,7 +527,10 @@ export class AgentSession<
|
|
|
527
527
|
newAgentId: agent.id,
|
|
528
528
|
}),
|
|
529
529
|
);
|
|
530
|
-
this.logger.debug(
|
|
530
|
+
this.logger.debug(
|
|
531
|
+
{ previousAgentId: previousActivity?.agent.id, newAgentId: agent.id },
|
|
532
|
+
'Agent handoff inserted into chat context',
|
|
533
|
+
);
|
|
531
534
|
|
|
532
535
|
await this.activity.start();
|
|
533
536
|
|
|
@@ -51,7 +51,7 @@ export class DataStreamAudioOutput extends AudioOutput {
|
|
|
51
51
|
#logger = log();
|
|
52
52
|
|
|
53
53
|
constructor(opts: DataStreamAudioOutputOptions) {
|
|
54
|
-
super(opts.sampleRate, undefined);
|
|
54
|
+
super(opts.sampleRate, undefined, { pause: false });
|
|
55
55
|
|
|
56
56
|
const { room, destinationIdentity, sampleRate, waitRemoteTrack } = opts;
|
|
57
57
|
this.room = room;
|