@livekit/agents 1.0.27 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/dist/connection_pool.cjs +242 -0
  2. package/dist/connection_pool.cjs.map +1 -0
  3. package/dist/connection_pool.d.cts +123 -0
  4. package/dist/connection_pool.d.ts +123 -0
  5. package/dist/connection_pool.d.ts.map +1 -0
  6. package/dist/connection_pool.js +218 -0
  7. package/dist/connection_pool.js.map +1 -0
  8. package/dist/connection_pool.test.cjs +256 -0
  9. package/dist/connection_pool.test.cjs.map +1 -0
  10. package/dist/connection_pool.test.js +255 -0
  11. package/dist/connection_pool.test.js.map +1 -0
  12. package/dist/index.cjs +2 -0
  13. package/dist/index.cjs.map +1 -1
  14. package/dist/index.d.cts +1 -0
  15. package/dist/index.d.ts +1 -0
  16. package/dist/index.d.ts.map +1 -1
  17. package/dist/index.js +1 -0
  18. package/dist/index.js.map +1 -1
  19. package/dist/inference/tts.cjs +172 -56
  20. package/dist/inference/tts.cjs.map +1 -1
  21. package/dist/inference/tts.d.cts +3 -0
  22. package/dist/inference/tts.d.ts +3 -0
  23. package/dist/inference/tts.d.ts.map +1 -1
  24. package/dist/inference/tts.js +173 -57
  25. package/dist/inference/tts.js.map +1 -1
  26. package/dist/utils.cjs +20 -0
  27. package/dist/utils.cjs.map +1 -1
  28. package/dist/utils.d.cts +7 -0
  29. package/dist/utils.d.ts +7 -0
  30. package/dist/utils.d.ts.map +1 -1
  31. package/dist/utils.js +19 -0
  32. package/dist/utils.js.map +1 -1
  33. package/dist/voice/agent_activity.cjs +3 -1
  34. package/dist/voice/agent_activity.cjs.map +1 -1
  35. package/dist/voice/agent_activity.d.ts.map +1 -1
  36. package/dist/voice/agent_activity.js +3 -1
  37. package/dist/voice/agent_activity.js.map +1 -1
  38. package/dist/voice/agent_session.cjs +4 -1
  39. package/dist/voice/agent_session.cjs.map +1 -1
  40. package/dist/voice/agent_session.d.ts.map +1 -1
  41. package/dist/voice/agent_session.js +4 -1
  42. package/dist/voice/agent_session.js.map +1 -1
  43. package/dist/voice/avatar/datastream_io.cjs +1 -1
  44. package/dist/voice/avatar/datastream_io.cjs.map +1 -1
  45. package/dist/voice/avatar/datastream_io.js +1 -1
  46. package/dist/voice/avatar/datastream_io.js.map +1 -1
  47. package/dist/voice/background_audio.cjs +77 -37
  48. package/dist/voice/background_audio.cjs.map +1 -1
  49. package/dist/voice/background_audio.d.cts +10 -3
  50. package/dist/voice/background_audio.d.ts +10 -3
  51. package/dist/voice/background_audio.d.ts.map +1 -1
  52. package/dist/voice/background_audio.js +78 -37
  53. package/dist/voice/background_audio.js.map +1 -1
  54. package/dist/voice/index.cjs +1 -0
  55. package/dist/voice/index.cjs.map +1 -1
  56. package/dist/voice/index.d.cts +1 -0
  57. package/dist/voice/index.d.ts +1 -0
  58. package/dist/voice/index.d.ts.map +1 -1
  59. package/dist/voice/index.js +1 -0
  60. package/dist/voice/index.js.map +1 -1
  61. package/dist/voice/io.cjs +10 -1
  62. package/dist/voice/io.cjs.map +1 -1
  63. package/dist/voice/io.d.cts +18 -1
  64. package/dist/voice/io.d.ts +18 -1
  65. package/dist/voice/io.d.ts.map +1 -1
  66. package/dist/voice/io.js +10 -1
  67. package/dist/voice/io.js.map +1 -1
  68. package/dist/voice/recorder_io/recorder_io.cjs +1 -1
  69. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  70. package/dist/voice/recorder_io/recorder_io.js +1 -1
  71. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  72. package/dist/voice/room_io/_output.cjs +1 -1
  73. package/dist/voice/room_io/_output.cjs.map +1 -1
  74. package/dist/voice/room_io/_output.js +1 -1
  75. package/dist/voice/room_io/_output.js.map +1 -1
  76. package/dist/voice/transcription/synchronizer.cjs +1 -1
  77. package/dist/voice/transcription/synchronizer.cjs.map +1 -1
  78. package/dist/voice/transcription/synchronizer.js +1 -1
  79. package/dist/voice/transcription/synchronizer.js.map +1 -1
  80. package/dist/worker.cjs +4 -6
  81. package/dist/worker.cjs.map +1 -1
  82. package/dist/worker.d.ts.map +1 -1
  83. package/dist/worker.js +4 -6
  84. package/dist/worker.js.map +1 -1
  85. package/package.json +3 -3
  86. package/src/connection_pool.test.ts +346 -0
  87. package/src/connection_pool.ts +307 -0
  88. package/src/index.ts +1 -0
  89. package/src/inference/tts.ts +206 -63
  90. package/src/utils.ts +25 -0
  91. package/src/voice/agent_activity.ts +7 -1
  92. package/src/voice/agent_session.ts +4 -1
  93. package/src/voice/avatar/datastream_io.ts +1 -1
  94. package/src/voice/background_audio.ts +95 -55
  95. package/src/voice/index.ts +1 -0
  96. package/src/voice/io.ts +24 -0
  97. package/src/voice/recorder_io/recorder_io.ts +1 -1
  98. package/src/voice/room_io/_output.ts +1 -1
  99. package/src/voice/transcription/synchronizer.ts +1 -1
  100. package/src/worker.ts +4 -7
@@ -5,13 +5,14 @@ import type { AudioFrame } from '@livekit/rtc-node';
5
5
  import { WebSocket } from 'ws';
6
6
  import { APIError, APIStatusError } from '../_exceptions.js';
7
7
  import { AudioByteStream } from '../audio.js';
8
+ import { ConnectionPool } from '../connection_pool.js';
8
9
  import { log } from '../log.js';
9
10
  import { createStreamChannel } from '../stream/stream_channel.js';
10
11
  import { basic as tokenizeBasic } from '../tokenize/index.js';
11
12
  import type { ChunkedStream } from '../tts/index.js';
12
13
  import { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';
13
14
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
14
- import { shortuuid } from '../utils.js';
15
+ import { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';
15
16
  import {
16
17
  type TtsClientEvent,
17
18
  type TtsServerEvent,
@@ -95,6 +96,7 @@ export interface InferenceTTSOptions<TModel extends TTSModels> {
95
96
  export class TTS<TModel extends TTSModels> extends BaseTTS {
96
97
  private opts: InferenceTTSOptions<TModel>;
97
98
  private streams: Set<SynthesizeStream<TModel>> = new Set();
99
+ pool: ConnectionPool<WebSocket>;
98
100
 
99
101
  #logger = log();
100
102
 
@@ -165,6 +167,15 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
165
167
  apiSecret: lkApiSecret,
166
168
  modelOptions,
167
169
  };
170
+
171
+ // Initialize connection pool
172
+ this.pool = new ConnectionPool<WebSocket>({
173
+ connectCb: (timeout) => this.connectWs(timeout),
174
+ closeCb: (ws) => this.closeWs(ws),
175
+ maxSessionDuration: 300_000,
176
+ markRefreshedOnGet: true,
177
+ connectTimeout: 10_000, // 10 seconds default
178
+ });
168
179
  }
169
180
 
170
181
  get label() {
@@ -218,6 +229,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
218
229
  if (this.opts.model) params.model = this.opts.model;
219
230
  if (this.opts.language) params.language = this.opts.language;
220
231
 
232
+ this.#logger.debug({ url }, 'inference.TTS creating new websocket connection (pool miss)');
221
233
  const socket = await connectWs(url, headers, timeout);
222
234
  socket.send(JSON.stringify(params));
223
235
  return socket;
@@ -227,11 +239,16 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
227
239
  await ws.close();
228
240
  }
229
241
 
242
+ prewarm(): void {
243
+ this.pool.prewarm();
244
+ }
245
+
230
246
  async close() {
231
247
  for (const stream of this.streams) {
232
248
  await stream.close();
233
249
  }
234
250
  this.streams.clear();
251
+ await this.pool.close();
235
252
  }
236
253
  }
237
254
 
@@ -256,30 +273,31 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
256
273
  }
257
274
 
258
275
  protected async run(): Promise<void> {
259
- let ws: WebSocket | null = null;
260
276
  let closing = false;
261
- let finalReceived = false;
262
277
  let lastFrame: AudioFrame | undefined;
263
278
 
264
279
  const sendTokenizerStream = new tokenizeBasic.SentenceTokenizer().stream();
265
280
  const eventChannel = createStreamChannel<TtsServerEvent>();
266
281
  const requestId = shortuuid('tts_request_');
282
+ const inputSentEvent = new Event();
283
+
284
+ // Signal for protocol-driven completion (when 'done' message is received)
285
+ const completionFuture = new Future<void>();
267
286
 
268
- const resourceCleanup = () => {
287
+ const resourceCleanup = async () => {
269
288
  if (closing) return;
270
289
  closing = true;
271
290
  sendTokenizerStream.close();
272
- eventChannel.close();
273
- ws?.removeAllListeners();
274
- ws?.close();
291
+ // close() returns a promise; don't leak it
292
+ await eventChannel.close();
275
293
  };
276
294
 
277
- const sendClientEvent = async (event: TtsClientEvent) => {
295
+ const sendClientEvent = async (event: TtsClientEvent, ws: WebSocket, signal: AbortSignal) => {
278
296
  // Don't send events to a closed WebSocket or aborted controller
279
- if (this.abortController.signal.aborted || closing) return;
297
+ if (signal.aborted || closing) return;
280
298
 
281
299
  const validatedEvent = await ttsClientEventSchema.parseAsync(event);
282
- if (!ws || ws.readyState !== WebSocket.OPEN) {
300
+ if (ws.readyState !== WebSocket.OPEN) {
283
301
  this.#logger.warn('Trying to send client TTS event to a closed WebSocket');
284
302
  return;
285
303
  }
@@ -293,9 +311,9 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
293
311
  }
294
312
  };
295
313
 
296
- const createInputTask = async () => {
314
+ const createInputTask = async (signal: AbortSignal) => {
297
315
  for await (const data of this.input) {
298
- if (this.abortController.signal.aborted || closing) break;
316
+ if (signal.aborted || closing) break;
299
317
  if (data === SynthesizeStream.FLUSH_SENTINEL) {
300
318
  sendTokenizerStream.flush();
301
319
  continue;
@@ -308,55 +326,108 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
308
326
  }
309
327
  };
310
328
 
311
- const createSentenceStreamTask = async () => {
329
+ const createSentenceStreamTask = async (ws: WebSocket, signal: AbortSignal) => {
312
330
  for await (const ev of sendTokenizerStream) {
313
- if (this.abortController.signal.aborted) break;
314
-
315
- sendClientEvent({
316
- type: 'input_transcript',
317
- transcript: ev.token + ' ',
318
- });
331
+ if (signal.aborted || closing) break;
332
+
333
+ await sendClientEvent(
334
+ {
335
+ type: 'input_transcript',
336
+ transcript: ev.token + ' ',
337
+ },
338
+ ws,
339
+ signal,
340
+ );
341
+ inputSentEvent.set();
319
342
  }
320
343
 
321
- sendClientEvent({ type: 'session.flush' });
344
+ await sendClientEvent({ type: 'session.flush' }, ws, signal);
345
+ // needed in case empty input is sent
346
+ inputSentEvent.set();
322
347
  };
323
348
 
324
- const createWsListenerTask = async (ws: WebSocket) => {
325
- return new Promise<void>((resolve, reject) => {
326
- this.abortController.signal.addEventListener('abort', () => {
327
- resourceCleanup();
328
- resolve(); // Abort is triggered by close(), which is a normal shutdown, not an error
329
- });
330
-
331
- ws.on('message', async (data) => {
349
+ // Handles WebSocket message routing and error handling
350
+ // Completes based on protocol messages, NOT on ws.close()
351
+ const createWsListenerTask = async (ws: WebSocket, signal: AbortSignal) => {
352
+ const onMessage = (data: Buffer) => {
353
+ try {
332
354
  const eventJson = JSON.parse(data.toString()) as Record<string, unknown>;
333
355
  const validatedEvent = ttsServerEventSchema.parse(eventJson);
334
- eventChannel.write(validatedEvent);
335
- });
336
-
337
- ws.on('error', (e) => {
338
- this.#logger.error({ error: e }, 'WebSocket error');
339
- resourceCleanup();
340
- reject(e);
341
- });
342
-
343
- ws.on('close', () => {
344
- resourceCleanup();
345
-
346
- if (!closing) return this.#logger.error('WebSocket closed unexpectedly');
347
- if (finalReceived) return resolve();
356
+ // writer.write returns a promise; avoid unhandled rejections if stream is closed
357
+ void eventChannel.write(validatedEvent).catch((error) => {
358
+ this.#logger.debug(
359
+ { error },
360
+ 'Failed writing TTS event to stream channel (likely closed)',
361
+ );
362
+ });
363
+ } catch (e) {
364
+ this.#logger.error({ error: e }, 'Error parsing WebSocket message');
365
+ }
366
+ };
348
367
 
349
- reject(
368
+ const onError = (e: Error) => {
369
+ this.#logger.error({ error: e }, 'WebSocket error');
370
+ void resourceCleanup();
371
+ try {
372
+ // If the ws is misbehaving, hard-stop it immediately to avoid buffering.
373
+ ws.terminate?.();
374
+ } catch {
375
+ // ignore
376
+ }
377
+ // Ensure this ws is not reused
378
+ this.tts.pool.remove(ws);
379
+ completionFuture.reject(e);
380
+ };
381
+
382
+ const onClose = () => {
383
+ // WebSocket closed unexpectedly (not by us)
384
+ if (!closing) {
385
+ this.#logger.error('WebSocket closed unexpectedly');
386
+ void resourceCleanup();
387
+ // Ensure this ws is not reused
388
+ this.tts.pool.remove(ws);
389
+ completionFuture.reject(
350
390
  new APIStatusError({
351
391
  message: 'Gateway connection closed unexpectedly',
352
392
  options: { requestId },
353
393
  }),
354
394
  );
355
- });
356
- });
395
+ }
396
+ };
397
+
398
+ const onAbort = () => {
399
+ void resourceCleanup();
400
+ try {
401
+ // On interruption/abort, close the websocket immediately so the server stops streaming
402
+ // and the ws library doesn't buffer unread frames in memory.
403
+ ws.terminate?.();
404
+ } catch {
405
+ // ignore
406
+ }
407
+ this.tts.pool.remove(ws);
408
+ inputSentEvent.set();
409
+ completionFuture.resolve();
410
+ };
411
+
412
+ // Attach listeners
413
+ ws.on('message', onMessage);
414
+ ws.on('error', onError);
415
+ ws.on('close', onClose);
416
+ signal.addEventListener('abort', onAbort);
417
+
418
+ try {
419
+ // Wait for protocol-driven completion or error
420
+ await completionFuture.await;
421
+ } finally {
422
+ // IMPORTANT: Remove listeners so connection can be reused
423
+ ws.off('message', onMessage);
424
+ ws.off('error', onError);
425
+ ws.off('close', onClose);
426
+ signal.removeEventListener('abort', onAbort);
427
+ }
357
428
  };
358
429
 
359
- const createRecvTask = async () => {
430
+ const createRecvTask = async (signal: AbortSignal) => {
360
431
  let currentSessionId: string | null = null;
361
432
 
362
433
  const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
@@ -364,9 +435,11 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
364
435
  const reader = serverEventStream.getReader();
365
436
 
366
437
  try {
367
- while (!this.closed && !this.abortController.signal.aborted) {
438
+ await inputSentEvent.wait();
439
+
440
+ while (!this.closed && !signal.aborted) {
368
441
  const result = await reader.read();
369
- if (this.abortController.signal.aborted) return;
442
+ if (signal.aborted) return;
370
443
  if (result.done) return;
371
444
 
372
445
  const serverEvent = result.value;
@@ -382,24 +455,29 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
382
455
  }
383
456
  break;
384
457
  case 'done':
385
- finalReceived = true;
386
458
  for (const frame of bstream.flush()) {
387
459
  sendLastFrame(currentSessionId!, false);
388
460
  lastFrame = frame;
389
461
  }
390
462
  sendLastFrame(currentSessionId!, true);
391
463
  this.queue.put(SynthesizeStream.END_OF_STREAM);
392
- break;
464
+ await resourceCleanup();
465
+ completionFuture.resolve();
466
+ return;
393
467
  case 'session.closed':
394
- resourceCleanup();
395
- break;
468
+ await resourceCleanup();
469
+ completionFuture.resolve();
470
+ return;
396
471
  case 'error':
397
472
  this.#logger.error(
398
473
  { serverEvent },
399
474
  'Received error message from LiveKit TTS WebSocket',
400
475
  );
401
- resourceCleanup();
402
- throw new APIError(`LiveKit TTS returned error: ${serverEvent.message}`);
476
+ await resourceCleanup();
477
+ completionFuture.reject(
478
+ new APIError(`LiveKit TTS returned error: ${serverEvent.message}`),
479
+ );
480
+ return;
403
481
  default:
404
482
  this.#logger.warn('Unexpected message %s', serverEvent);
405
483
  break;
@@ -416,16 +494,81 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
416
494
  };
417
495
 
418
496
  try {
419
- ws = await this.tts.connectWs(this.connOptions.timeoutMs);
420
-
421
- await Promise.all([
422
- createInputTask(),
423
- createSentenceStreamTask(),
424
- createWsListenerTask(ws),
425
- createRecvTask(),
426
- ]);
497
+ await this.tts.pool.withConnection(
498
+ async (ws: WebSocket) => {
499
+ try {
500
+ // IMPORTANT: don't cancel the stream's controller on normal completion,
501
+ // otherwise the pool will remove+close the ws and every run becomes a pool miss.
502
+ const runController = new AbortController();
503
+ const onStreamAbort = () => runController.abort(this.abortController.signal.reason);
504
+ this.abortController.signal.addEventListener('abort', onStreamAbort, { once: true });
505
+
506
+ const tasks = [
507
+ Task.from(
508
+ async (controller) => {
509
+ const combined = combineSignals(runController.signal, controller.signal);
510
+ await createInputTask(combined);
511
+ },
512
+ undefined,
513
+ 'inference-tts-input',
514
+ ),
515
+ Task.from(
516
+ async (controller) => {
517
+ const combined = combineSignals(runController.signal, controller.signal);
518
+ await createSentenceStreamTask(ws, combined);
519
+ },
520
+ undefined,
521
+ 'inference-tts-sentence',
522
+ ),
523
+ Task.from(
524
+ async (controller) => {
525
+ const combined = combineSignals(runController.signal, controller.signal);
526
+ await createWsListenerTask(ws, combined);
527
+ },
528
+ undefined,
529
+ 'inference-tts-ws-listener',
530
+ ),
531
+ Task.from(
532
+ async (controller) => {
533
+ const combined = combineSignals(runController.signal, controller.signal);
534
+ await createRecvTask(combined);
535
+ },
536
+ undefined,
537
+ 'inference-tts-recv',
538
+ ),
539
+ ];
540
+
541
+ try {
542
+ await Promise.all(tasks.map((t) => t.result));
543
+ } finally {
544
+ // Mirror python finally: unblock recv and cancel all tasks.
545
+ inputSentEvent.set();
546
+ await resourceCleanup();
547
+ await cancelAndWait(tasks, 5000);
548
+ this.abortController.signal.removeEventListener('abort', onStreamAbort);
549
+ }
550
+ } catch (e) {
551
+ // If aborted, don't throw - let cleanup handle it
552
+ if (e instanceof Error && e.name === 'AbortError') {
553
+ return;
554
+ }
555
+ throw e;
556
+ }
557
+ },
558
+ {
559
+ timeout: this.connOptions.timeoutMs,
560
+ },
561
+ );
562
+ } catch (e) {
563
+ // Handle connection errors
564
+ if (e instanceof Error && e.name === 'AbortError') {
565
+ // Abort is expected during normal shutdown
566
+ return;
567
+ }
568
+ throw e;
427
569
  } finally {
428
- resourceCleanup();
570
+ // Ensure cleanup always runs (and don't leak the promise)
571
+ await resourceCleanup();
429
572
  }
430
573
  }
431
574
  }
package/src/utils.ts CHANGED
@@ -840,6 +840,31 @@ export async function waitForAbort(signal: AbortSignal) {
840
840
  return await abortFuture.await;
841
841
  }
842
842
 
843
+ /**
844
+ * Combines two abort signals into a single abort signal.
845
+ * @param a - The first abort signal.
846
+ * @param b - The second abort signal.
847
+ * @returns A new abort signal that is aborted when either of the input signals is aborted.
848
+ */
849
+ export const combineSignals = (a: AbortSignal, b: AbortSignal): AbortSignal => {
850
+ const c = new AbortController();
851
+ const abortFrom = (s: AbortSignal) => {
852
+ if (c.signal.aborted) return;
853
+ c.abort((s as any).reason);
854
+ };
855
+ if (a.aborted) {
856
+ abortFrom(a);
857
+ } else {
858
+ a.addEventListener('abort', () => abortFrom(a), { once: true });
859
+ }
860
+ if (b.aborted) {
861
+ abortFrom(b);
862
+ } else {
863
+ b.addEventListener('abort', () => abortFrom(b), { once: true });
864
+ }
865
+ return c.signal;
866
+ };
867
+
843
868
  export const isCloud = (url: URL) => {
844
869
  const hostname = url.hostname;
845
870
  return hostname.endsWith('.livekit.cloud') || hostname.endsWith('.livekit.run');
@@ -1449,6 +1449,13 @@ export class AgentActivity implements RecognitionHooks {
1449
1449
  { speech_id: speechHandle.id },
1450
1450
  'Aborting all pipeline reply tasks due to interruption',
1451
1451
  );
1452
+
1453
+ // Stop playout ASAP (don't wait for cancellations), otherwise the segment may finish and we
1454
+ // will correctly (but undesirably) commit a long transcript even though the user said "stop".
1455
+ if (audioOutput) {
1456
+ audioOutput.clearBuffer();
1457
+ }
1458
+
1452
1459
  replyAbortController.abort();
1453
1460
  await Promise.allSettled(
1454
1461
  tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT)),
@@ -1457,7 +1464,6 @@ export class AgentActivity implements RecognitionHooks {
1457
1464
  let forwardedText = textOut?.text || '';
1458
1465
 
1459
1466
  if (audioOutput) {
1460
- audioOutput.clearBuffer();
1461
1467
  const playbackEv = await audioOutput.waitForPlayout();
1462
1468
  if (audioOut?.firstFrameFut.done) {
1463
1469
  // playback EV is valid only if the first frame was already played
@@ -527,7 +527,10 @@ export class AgentSession<
527
527
  newAgentId: agent.id,
528
528
  }),
529
529
  );
530
- this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
530
+ this.logger.debug(
531
+ { previousAgentId: previousActivity?.agent.id, newAgentId: agent.id },
532
+ 'Agent handoff inserted into chat context',
533
+ );
531
534
 
532
535
  await this.activity.start();
533
536
 
@@ -51,7 +51,7 @@ export class DataStreamAudioOutput extends AudioOutput {
51
51
  #logger = log();
52
52
 
53
53
  constructor(opts: DataStreamAudioOutputOptions) {
54
- super(opts.sampleRate, undefined);
54
+ super(opts.sampleRate, undefined, { pause: false });
55
55
 
56
56
  const { room, destinationIdentity, sampleRate, waitRemoteTrack } = opts;
57
57
  this.room = room;