@livekit/agents 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/dist/_exceptions.cjs.map +1 -1
  2. package/dist/_exceptions.d.ts.map +1 -1
  3. package/dist/_exceptions.js.map +1 -1
  4. package/dist/audio.cjs +10 -0
  5. package/dist/audio.cjs.map +1 -1
  6. package/dist/audio.d.cts +1 -1
  7. package/dist/audio.d.ts +1 -1
  8. package/dist/audio.d.ts.map +1 -1
  9. package/dist/audio.js +10 -0
  10. package/dist/audio.js.map +1 -1
  11. package/dist/beta/workflows/task_group.cjs +7 -4
  12. package/dist/beta/workflows/task_group.cjs.map +1 -1
  13. package/dist/beta/workflows/task_group.d.ts.map +1 -1
  14. package/dist/beta/workflows/task_group.js +7 -4
  15. package/dist/beta/workflows/task_group.js.map +1 -1
  16. package/dist/inference/api_protos.d.cts +26 -26
  17. package/dist/inference/api_protos.d.ts +26 -26
  18. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  19. package/dist/inference/interruption/http_transport.d.cts +3 -1
  20. package/dist/inference/interruption/http_transport.d.ts +3 -1
  21. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  22. package/dist/inference/interruption/http_transport.js.map +1 -1
  23. package/dist/inference/interruption/ws_transport.cjs +37 -32
  24. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  25. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  26. package/dist/inference/interruption/ws_transport.js +37 -32
  27. package/dist/inference/interruption/ws_transport.js.map +1 -1
  28. package/dist/inference/tts.cjs +14 -1
  29. package/dist/inference/tts.cjs.map +1 -1
  30. package/dist/inference/tts.d.cts +42 -4
  31. package/dist/inference/tts.d.ts +42 -4
  32. package/dist/inference/tts.d.ts.map +1 -1
  33. package/dist/inference/tts.js +24 -3
  34. package/dist/inference/tts.js.map +1 -1
  35. package/dist/inference/tts.test.cjs +72 -0
  36. package/dist/inference/tts.test.cjs.map +1 -1
  37. package/dist/inference/tts.test.js +72 -0
  38. package/dist/inference/tts.test.js.map +1 -1
  39. package/dist/ipc/job_proc_lazy_main.cjs +7 -2
  40. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  41. package/dist/ipc/job_proc_lazy_main.js +7 -2
  42. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  43. package/dist/ipc/supervised_proc.cjs +4 -1
  44. package/dist/ipc/supervised_proc.cjs.map +1 -1
  45. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  46. package/dist/ipc/supervised_proc.js +4 -1
  47. package/dist/ipc/supervised_proc.js.map +1 -1
  48. package/dist/ipc/supervised_proc.test.cjs +82 -0
  49. package/dist/ipc/supervised_proc.test.cjs.map +1 -1
  50. package/dist/ipc/supervised_proc.test.js +82 -0
  51. package/dist/ipc/supervised_proc.test.js.map +1 -1
  52. package/dist/job.cjs +2 -1
  53. package/dist/job.cjs.map +1 -1
  54. package/dist/job.d.ts.map +1 -1
  55. package/dist/job.js +2 -1
  56. package/dist/job.js.map +1 -1
  57. package/dist/llm/chat_context.cjs +102 -31
  58. package/dist/llm/chat_context.cjs.map +1 -1
  59. package/dist/llm/chat_context.d.ts.map +1 -1
  60. package/dist/llm/chat_context.js +102 -31
  61. package/dist/llm/chat_context.js.map +1 -1
  62. package/dist/llm/chat_context.test.cjs +123 -5
  63. package/dist/llm/chat_context.test.cjs.map +1 -1
  64. package/dist/llm/chat_context.test.js +123 -5
  65. package/dist/llm/chat_context.test.js.map +1 -1
  66. package/dist/llm/fallback_adapter.cjs +2 -0
  67. package/dist/llm/fallback_adapter.cjs.map +1 -1
  68. package/dist/llm/fallback_adapter.d.ts.map +1 -1
  69. package/dist/llm/fallback_adapter.js +2 -0
  70. package/dist/llm/fallback_adapter.js.map +1 -1
  71. package/dist/llm/index.cjs +2 -0
  72. package/dist/llm/index.cjs.map +1 -1
  73. package/dist/llm/index.d.cts +1 -1
  74. package/dist/llm/index.d.ts +1 -1
  75. package/dist/llm/index.d.ts.map +1 -1
  76. package/dist/llm/index.js +2 -0
  77. package/dist/llm/index.js.map +1 -1
  78. package/dist/llm/utils.cjs +89 -0
  79. package/dist/llm/utils.cjs.map +1 -1
  80. package/dist/llm/utils.d.cts +8 -0
  81. package/dist/llm/utils.d.ts +8 -0
  82. package/dist/llm/utils.d.ts.map +1 -1
  83. package/dist/llm/utils.js +88 -0
  84. package/dist/llm/utils.js.map +1 -1
  85. package/dist/llm/utils.test.cjs +90 -0
  86. package/dist/llm/utils.test.cjs.map +1 -1
  87. package/dist/llm/utils.test.js +98 -2
  88. package/dist/llm/utils.test.js.map +1 -1
  89. package/dist/stt/stt.cjs +8 -0
  90. package/dist/stt/stt.cjs.map +1 -1
  91. package/dist/stt/stt.d.cts +8 -0
  92. package/dist/stt/stt.d.ts +8 -0
  93. package/dist/stt/stt.d.ts.map +1 -1
  94. package/dist/stt/stt.js +8 -0
  95. package/dist/stt/stt.js.map +1 -1
  96. package/dist/tts/fallback_adapter.cjs +6 -0
  97. package/dist/tts/fallback_adapter.cjs.map +1 -1
  98. package/dist/tts/fallback_adapter.d.ts.map +1 -1
  99. package/dist/tts/fallback_adapter.js +6 -0
  100. package/dist/tts/fallback_adapter.js.map +1 -1
  101. package/dist/typed_promise.cjs +48 -0
  102. package/dist/typed_promise.cjs.map +1 -0
  103. package/dist/typed_promise.d.cts +24 -0
  104. package/dist/typed_promise.d.ts +24 -0
  105. package/dist/typed_promise.d.ts.map +1 -0
  106. package/dist/typed_promise.js +28 -0
  107. package/dist/typed_promise.js.map +1 -0
  108. package/dist/utils.cjs +30 -2
  109. package/dist/utils.cjs.map +1 -1
  110. package/dist/utils.d.cts +18 -0
  111. package/dist/utils.d.ts +18 -0
  112. package/dist/utils.d.ts.map +1 -1
  113. package/dist/utils.js +27 -2
  114. package/dist/utils.js.map +1 -1
  115. package/dist/version.cjs +1 -1
  116. package/dist/version.js +1 -1
  117. package/dist/voice/agent_activity.cjs +10 -0
  118. package/dist/voice/agent_activity.cjs.map +1 -1
  119. package/dist/voice/agent_activity.d.ts.map +1 -1
  120. package/dist/voice/agent_activity.js +11 -0
  121. package/dist/voice/agent_activity.js.map +1 -1
  122. package/dist/voice/agent_session.cjs +1 -1
  123. package/dist/voice/agent_session.cjs.map +1 -1
  124. package/dist/voice/agent_session.d.cts +4 -2
  125. package/dist/voice/agent_session.d.ts +4 -2
  126. package/dist/voice/agent_session.d.ts.map +1 -1
  127. package/dist/voice/agent_session.js +1 -1
  128. package/dist/voice/agent_session.js.map +1 -1
  129. package/dist/voice/events.cjs +11 -0
  130. package/dist/voice/events.cjs.map +1 -1
  131. package/dist/voice/events.d.cts +12 -1
  132. package/dist/voice/events.d.ts +12 -1
  133. package/dist/voice/events.d.ts.map +1 -1
  134. package/dist/voice/events.js +10 -0
  135. package/dist/voice/events.js.map +1 -1
  136. package/dist/voice/generation.cjs +23 -4
  137. package/dist/voice/generation.cjs.map +1 -1
  138. package/dist/voice/generation.d.ts.map +1 -1
  139. package/dist/voice/generation.js +32 -5
  140. package/dist/voice/generation.js.map +1 -1
  141. package/dist/voice/generation_tts_timeout.test.cjs +85 -0
  142. package/dist/voice/generation_tts_timeout.test.cjs.map +1 -0
  143. package/dist/voice/generation_tts_timeout.test.js +84 -0
  144. package/dist/voice/generation_tts_timeout.test.js.map +1 -0
  145. package/dist/voice/index.cjs.map +1 -1
  146. package/dist/voice/index.d.cts +1 -1
  147. package/dist/voice/index.d.ts +1 -1
  148. package/dist/voice/index.d.ts.map +1 -1
  149. package/dist/voice/index.js +3 -1
  150. package/dist/voice/index.js.map +1 -1
  151. package/dist/voice/recorder_io/recorder_io.cjs +1 -2
  152. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  153. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  154. package/dist/voice/recorder_io/recorder_io.js +2 -3
  155. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  156. package/dist/voice/report.cjs +1 -1
  157. package/dist/voice/report.cjs.map +1 -1
  158. package/dist/voice/report.js +1 -1
  159. package/dist/voice/report.js.map +1 -1
  160. package/dist/voice/report.test.cjs +70 -0
  161. package/dist/voice/report.test.cjs.map +1 -1
  162. package/dist/voice/report.test.js +70 -0
  163. package/dist/voice/report.test.js.map +1 -1
  164. package/dist/voice/room_io/room_io.cjs +5 -1
  165. package/dist/voice/room_io/room_io.cjs.map +1 -1
  166. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  167. package/dist/voice/room_io/room_io.js +5 -1
  168. package/dist/voice/room_io/room_io.js.map +1 -1
  169. package/dist/voice/room_io/room_io.test.cjs +18 -0
  170. package/dist/voice/room_io/room_io.test.cjs.map +1 -0
  171. package/dist/voice/room_io/room_io.test.js +17 -0
  172. package/dist/voice/room_io/room_io.test.js.map +1 -0
  173. package/package.json +4 -2
  174. package/src/_exceptions.ts +5 -0
  175. package/src/audio.ts +12 -1
  176. package/src/beta/workflows/task_group.ts +14 -5
  177. package/src/inference/interruption/http_transport.ts +2 -1
  178. package/src/inference/interruption/ws_transport.ts +44 -34
  179. package/src/inference/tts.test.ts +87 -0
  180. package/src/inference/tts.ts +71 -9
  181. package/src/ipc/job_proc_lazy_main.ts +7 -2
  182. package/src/ipc/supervised_proc.test.ts +96 -0
  183. package/src/ipc/supervised_proc.ts +8 -1
  184. package/src/job.ts +1 -0
  185. package/src/llm/chat_context.test.ts +137 -5
  186. package/src/llm/chat_context.ts +119 -38
  187. package/src/llm/fallback_adapter.ts +5 -2
  188. package/src/llm/index.ts +2 -0
  189. package/src/llm/utils.test.ts +103 -2
  190. package/src/llm/utils.ts +128 -0
  191. package/src/stt/stt.ts +9 -1
  192. package/src/tts/fallback_adapter.ts +9 -2
  193. package/src/typed_promise.ts +67 -0
  194. package/src/utils.ts +45 -2
  195. package/src/voice/agent_activity.ts +11 -0
  196. package/src/voice/agent_session.ts +13 -7
  197. package/src/voice/events.ts +21 -0
  198. package/src/voice/generation.ts +35 -8
  199. package/src/voice/generation_tts_timeout.test.ts +112 -0
  200. package/src/voice/index.ts +6 -1
  201. package/src/voice/recorder_io/recorder_io.ts +2 -7
  202. package/src/voice/report.test.ts +78 -0
  203. package/src/voice/report.ts +1 -1
  204. package/src/voice/room_io/room_io.test.ts +38 -0
  205. package/src/voice/room_io/room_io.ts +7 -2
package/src/llm/utils.ts CHANGED
@@ -7,6 +7,7 @@ import sharp from 'sharp';
7
7
  import type { UnknownUserData } from '../voice/run_context.js';
8
8
  import type { ChatContext } from './chat_context.js';
9
9
  import {
10
+ type ChatContent,
10
11
  type ChatItem,
11
12
  FunctionCall,
12
13
  FunctionCallOutput,
@@ -241,6 +242,133 @@ export async function executeToolCall(
241
242
  }
242
243
  }
243
244
 
245
+ export interface FormatChatHistoryOptions {
246
+ includeIds?: boolean;
247
+ includeTimestamps?: boolean;
248
+ }
249
+
250
+ /**
251
+ * Render a chat context into a readable multiline string for debugging and logging.
252
+ */
253
+ export function formatChatHistory(
254
+ chatCtx: ChatContext,
255
+ options: FormatChatHistoryOptions = {},
256
+ ): string {
257
+ const { includeIds = false, includeTimestamps = false } = options;
258
+
259
+ if (chatCtx.items.length === 0) {
260
+ return 'Chat history (0 items)';
261
+ }
262
+
263
+ const formattedItems = chatCtx.items.map((item, index) =>
264
+ formatChatHistoryItem(item, index, {
265
+ includeIds,
266
+ includeTimestamps,
267
+ }),
268
+ );
269
+
270
+ return [
271
+ `Chat history (${chatCtx.items.length} items)`,
272
+ ...formattedItems.flatMap((item) => ['', item]),
273
+ ].join('\n');
274
+ }
275
+
276
+ function formatChatHistoryItem(
277
+ item: ChatItem,
278
+ index: number,
279
+ options: Required<FormatChatHistoryOptions>,
280
+ ): string {
281
+ const headerParts = [`[${index}]`];
282
+
283
+ if (item.type === 'message') {
284
+ headerParts.push('message', item.role);
285
+ } else if (item.type === 'function_call') {
286
+ headerParts.push('function_call', item.name, `call_id=${item.callId}`);
287
+ } else if (item.type === 'function_call_output') {
288
+ headerParts.push('function_call_output', item.name || '(unnamed)', `call_id=${item.callId}`);
289
+ if (item.isError) {
290
+ headerParts.push('error=true');
291
+ }
292
+ } else {
293
+ headerParts.push('agent_handoff');
294
+ }
295
+
296
+ if (options.includeIds) {
297
+ headerParts.push(`id=${item.id}`);
298
+ }
299
+
300
+ if (options.includeTimestamps) {
301
+ headerParts.push(`created_at=${item.createdAt.toFixed(3)}`);
302
+ }
303
+
304
+ const body = formatChatHistoryItemBody(item);
305
+ if (!body) {
306
+ return headerParts.join(' ');
307
+ }
308
+
309
+ return `${headerParts.join(' ')}\n${indentBlock(body, ' ')}`;
310
+ }
311
+
312
+ function formatChatHistoryItemBody(item: ChatItem): string {
313
+ if (item.type === 'message') {
314
+ const content = item.content.map((part) => formatMessageContentPart(part)).join('\n');
315
+ return content.trim() ? content : '(empty)';
316
+ }
317
+
318
+ if (item.type === 'function_call') {
319
+ return prettyJsonText(item.args);
320
+ }
321
+
322
+ if (item.type === 'function_call_output') {
323
+ return prettyJsonText(item.output);
324
+ }
325
+
326
+ return `${item.oldAgentId ?? '(none)'} -> ${item.newAgentId}`;
327
+ }
328
+
329
+ function formatMessageContentPart(part: ChatContent): string {
330
+ if (typeof part === 'string') {
331
+ return part;
332
+ }
333
+
334
+ if (part.type === 'image_content') {
335
+ if (typeof part.image === 'string') {
336
+ return `[image url=${truncateText(part.image, 120)}]`;
337
+ }
338
+
339
+ return `[image frame=${part.image.width}x${part.image.height}]`;
340
+ }
341
+
342
+ if (part.transcript) {
343
+ return `[audio transcript=${JSON.stringify(truncateText(part.transcript, 120))}]`;
344
+ }
345
+
346
+ return `[audio frames=${part.frame.length}]`;
347
+ }
348
+
349
+ function prettyJsonText(text: string): string {
350
+ try {
351
+ return JSON.stringify(JSON.parse(text), null, 2);
352
+ } catch {
353
+ return text;
354
+ }
355
+ }
356
+
357
+ function truncateText(text: string, maxLength: number): string {
358
+ if (text.length <= maxLength) {
359
+ return text;
360
+ }
361
+
362
+ return `${text.slice(0, Math.max(0, maxLength - 3))}...`;
363
+ }
364
+
365
+ function indentBlock(text: string, indent: string): string {
366
+ return text
367
+ .split('\n')
368
+ .map((line) => `${indent}${line}`)
369
+ .join('\n');
370
+ }
371
+
244
372
  /**
245
373
  * Standard dynamic-programming LCS to get the common subsequence
246
374
  * of IDs (in order) that appear in both old_ids and new_ids.
package/src/stt/stt.ts CHANGED
@@ -248,7 +248,15 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
248
248
  startSoon(() => this.mainTask().finally(() => this.queue.close()));
249
249
  }
250
250
 
251
- private async mainTask() {
251
+ /**
252
+ * Runs the STT with retry logic. Errors are emitted via {@link STT} error events
253
+ * and then re-thrown to trigger `.finally()` cleanup.
254
+ *
255
+ * @throws {APIError} When the STT request fails with a non-retryable error
256
+ * @throws {APIConnectionError} When all retry attempts are exhausted
257
+ * @internal Not annotated with Throws<> because this is fire-and-forget via startSoon()
258
+ */
259
+ private async mainTask(): Promise<void> {
252
260
  for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
253
261
  try {
254
262
  return await this.run();
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { AudioResampler } from '@livekit/rtc-node';
5
+ import type { Throws } from '@livekit/throws-transformer/throws';
5
6
  import { APIConnectionError, APIError } from '../_exceptions.js';
6
7
  import { log } from '../log.js';
7
8
  import { basic } from '../tokenize/index.js';
@@ -306,7 +307,10 @@ class FallbackChunkedStream extends ChunkedStream {
306
307
  this.connOptions = connOptions;
307
308
  }
308
309
 
309
- protected async run(): Promise<void> {
310
+ /**
311
+ * @throws {APIConnectionError} When all TTS providers have been exhausted
312
+ */
313
+ protected async run(): Promise<Throws<void, APIConnectionError>> {
310
314
  const allTTSFailed = this.adapter.status.every((s) => !s.available);
311
315
  let lastRequestId: string = '';
312
316
  let lastSegmentId: string = '';
@@ -406,7 +410,10 @@ class FallbackSynthesizeStream extends SynthesizeStream {
406
410
  this.adapter = adapter;
407
411
  }
408
412
 
409
- protected async run(): Promise<void> {
413
+ /**
414
+ * @throws {APIConnectionError} When all TTS providers have been exhausted
415
+ */
416
+ protected async run(): Promise<Throws<void, APIConnectionError>> {
410
417
  const allTTSFailed = this.adapter.status.every((s) => !s.available);
411
418
  if (allTTSFailed) {
412
419
  this._logger.warn('All fallback TTS instances failed, retrying from first...');
@@ -0,0 +1,67 @@
1
+ // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ type InferErrors<T> = T extends TypedPromise<any, infer E> ? E : never;
5
+
6
+ interface PromiseRejectedResult<E> {
7
+ status: 'rejected';
8
+ reason: E;
9
+ }
10
+
11
+ type SettledResult<T> =
12
+ T extends TypedPromise<infer U, infer E>
13
+ ? PromiseFulfilledResult<U> | PromiseRejectedResult<E>
14
+ : T extends PromiseLike<infer U>
15
+ ? PromiseFulfilledResult<U> | PromiseRejectedResult<unknown>
16
+ : PromiseFulfilledResult<T> | PromiseRejectedResult<unknown>;
17
+
18
+ export default class TypedPromise<T, E extends Error> extends Promise<T> {
19
+ // eslint-disable-next-line @typescript-eslint/no-useless-constructor
20
+ constructor(
21
+ executor: (resolve: (value: T | PromiseLike<T>) => void, reject: (reason: E) => void) => void,
22
+ ) {
23
+ super(executor);
24
+ }
25
+
26
+ catch<TResult = never>(
27
+ onrejected?: ((reason: E) => TResult | PromiseLike<TResult>) | null | undefined,
28
+ ): TypedPromise<T | TResult, E> {
29
+ return super.catch(onrejected);
30
+ }
31
+
32
+ static resolve: {
33
+ (): TypedPromise<void, never>;
34
+ <V>(value: V): TypedPromise<Awaited<V>, never>;
35
+ } = <V>(value?: V): TypedPromise<Awaited<V>, never> => {
36
+ return super.resolve(value) as TypedPromise<Awaited<V>, never>;
37
+ };
38
+
39
+ static reject<E extends Error>(reason: E): TypedPromise<never, E> {
40
+ return super.reject(reason);
41
+ }
42
+
43
+ static all<T extends readonly unknown[] | []>(
44
+ values: T,
45
+ ): TypedPromise<{ -readonly [P in keyof T]: Awaited<T[P]> }, InferErrors<T[number]>> {
46
+ return super.all(values) as any;
47
+ }
48
+
49
+ static allSettled<T extends readonly unknown[] | []>(
50
+ values: T,
51
+ ): TypedPromise<{ -readonly [P in keyof T]: SettledResult<T[P]> }, never> {
52
+ return super.allSettled(values) as any;
53
+ }
54
+
55
+ static race<T extends readonly (TypedPromise<any, any> | any)[]>(
56
+ values: T,
57
+ ): TypedPromise<
58
+ T[number] extends TypedPromise<infer U, any>
59
+ ? U
60
+ : T[number] extends PromiseLike<infer U>
61
+ ? U
62
+ : Awaited<T[number]>,
63
+ InferErrors<T[number]>
64
+ > {
65
+ return super.race(values);
66
+ }
67
+ }
package/src/utils.ts CHANGED
@@ -9,6 +9,7 @@ import type {
9
9
  TrackKind,
10
10
  } from '@livekit/rtc-node';
11
11
  import { AudioFrame, AudioResampler, RoomEvent } from '@livekit/rtc-node';
12
+ import type { Throws } from '@livekit/throws-transformer/throws';
12
13
  import { AsyncLocalStorage } from 'node:async_hooks';
13
14
  import { EventEmitter, once } from 'node:events';
14
15
  import type { ReadableStream } from 'node:stream/web';
@@ -752,6 +753,21 @@ export function isStreamClosedError(error: unknown): boolean {
752
753
  );
753
754
  }
754
755
 
756
+ /** FFmpeg error messages expected during normal teardown/shutdown. */
757
+ const FFMPEG_TEARDOWN_ERRORS = ['Output stream closed', 'received signal 2', 'SIGKILL', 'SIGINT'];
758
+
759
+ /**
760
+ * Check if an error is an expected FFmpeg teardown error that can be safely ignored during cleanup.
761
+ *
762
+ * @param error - The error to check.
763
+ * @returns True if the error is an expected FFmpeg shutdown error.
764
+ */
765
+ export function isFfmpegTeardownError(error: unknown): boolean {
766
+ return (
767
+ error instanceof Error && FFMPEG_TEARDOWN_ERRORS.some((msg) => error.message?.includes(msg))
768
+ );
769
+ }
770
+
755
771
  /**
756
772
  * In JS an error can be any arbitrary value.
757
773
  * This function converts an unknown error to an Error and stores the original value in the error object.
@@ -789,11 +805,11 @@ export type DelayOptions = {
789
805
  */
790
806
  export function delay(ms: number, options: DelayOptions = {}): Promise<void> {
791
807
  const { signal } = options;
792
- if (signal?.aborted) return Promise.reject(signal.reason);
808
+ if (signal?.aborted) return Promise.reject(signal.reason ?? new Error('delay aborted'));
793
809
  return new Promise((resolve, reject) => {
794
810
  const abort = () => {
795
811
  clearTimeout(i);
796
- reject(signal?.reason);
812
+ reject(signal?.reason ?? new Error('delay aborted'));
797
813
  };
798
814
  const done = () => {
799
815
  signal?.removeEventListener('abort', abort);
@@ -804,6 +820,33 @@ export function delay(ms: number, options: DelayOptions = {}): Promise<void> {
804
820
  });
805
821
  }
806
822
 
823
+ export class IdleTimeoutError extends Error {
824
+ constructor(message = 'idle timeout') {
825
+ super(message);
826
+ this.name = 'IdleTimeoutError';
827
+ }
828
+ }
829
+
830
+ /**
831
+ * Race a promise against an idle timeout. If the promise does not settle within
832
+ * `timeoutMs` milliseconds, the returned promise rejects with {@link IdleTimeoutError}
833
+ * (or the error returned by `throwError` when provided).
834
+ * The timer is properly cleaned up on settlement to avoid leaking handles.
835
+ */
836
+ export function waitUntilTimeout<T, E extends Error = IdleTimeoutError>(
837
+ promise: Promise<T>,
838
+ timeoutMs: number,
839
+ throwError?: () => E,
840
+ ): Promise<Throws<T, E>> {
841
+ let timer: ReturnType<typeof setTimeout> | undefined;
842
+ return Promise.race([
843
+ promise,
844
+ new Promise<never>((_, reject) => {
845
+ timer = setTimeout(() => reject(throwError?.() ?? new IdleTimeoutError()), timeoutMs);
846
+ }),
847
+ ]).finally(() => clearTimeout(timer)) as Promise<Throws<T, E>>;
848
+ }
849
+
807
850
  /**
808
851
  * Returns a participant that matches the given identity. If identity is None, the first
809
852
  * participant that joins the room will be returned.
@@ -67,6 +67,7 @@ import {
67
67
  createErrorEvent,
68
68
  createFunctionToolsExecutedEvent,
69
69
  createMetricsCollectedEvent,
70
+ createSessionUsageUpdatedEvent,
70
71
  createSpeechCreatedEvent,
71
72
  createUserInputTranscribedEvent,
72
73
  } from './events.js';
@@ -157,10 +158,15 @@ export class AgentActivity implements RecognitionHooks {
157
158
 
158
159
  private readonly onInterruptionMetricsCollected = (ev: InterruptionMetrics): void => {
159
160
  this.agentSession._usageCollector.collect(ev);
161
+ const usage = this.agentSession.usage;
160
162
  this.agentSession.emit(
161
163
  AgentSessionEventTypes.MetricsCollected,
162
164
  createMetricsCollectedEvent({ metrics: ev }),
163
165
  );
166
+ this.agentSession.emit(
167
+ AgentSessionEventTypes.SessionUsageUpdated,
168
+ createSessionUsageUpdatedEvent({ usage }),
169
+ );
164
170
  };
165
171
 
166
172
  private readonly onInterruptionError = (ev: InterruptionDetectionError): void => {
@@ -730,11 +736,16 @@ export class AgentActivity implements RecognitionHooks {
730
736
  }
731
737
 
732
738
  this.agentSession._usageCollector.collect(ev);
739
+ const usage = this.agentSession.usage;
733
740
 
734
741
  this.agentSession.emit(
735
742
  AgentSessionEventTypes.MetricsCollected,
736
743
  createMetricsCollectedEvent({ metrics: ev }),
737
744
  );
745
+ this.agentSession.emit(
746
+ AgentSessionEventTypes.SessionUsageUpdated,
747
+ createSessionUsageUpdatedEvent({ usage }),
748
+ );
738
749
  };
739
750
 
740
751
  private onError(ev: RealtimeModelError | STTError | TTSError | LLMError): void {
@@ -52,6 +52,7 @@ import {
52
52
  type ErrorEvent,
53
53
  type FunctionToolsExecutedEvent,
54
54
  type MetricsCollectedEvent,
55
+ type SessionUsageUpdatedEvent,
55
56
  type ShutdownReason,
56
57
  type SpeechCreatedEvent,
57
58
  type UserInputTranscribedEvent,
@@ -131,6 +132,7 @@ export type AgentSessionCallbacks = {
131
132
  [AgentSessionEventTypes.ConversationItemAdded]: (ev: ConversationItemAddedEvent) => void;
132
133
  [AgentSessionEventTypes.FunctionToolsExecuted]: (ev: FunctionToolsExecutedEvent) => void;
133
134
  [AgentSessionEventTypes.MetricsCollected]: (ev: MetricsCollectedEvent) => void;
135
+ [AgentSessionEventTypes.SessionUsageUpdated]: (ev: SessionUsageUpdatedEvent) => void;
134
136
  [AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;
135
137
  [AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;
136
138
  [AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;
@@ -649,7 +651,8 @@ export class AgentSession<
649
651
  }
650
652
 
651
653
  generateReply(options?: {
652
- userInput?: string;
654
+ userInput?: string | ChatMessage;
655
+ chatCtx?: ChatContext;
653
656
  instructions?: string;
654
657
  toolChoice?: ToolChoice;
655
658
  allowInterruptions?: boolean;
@@ -658,12 +661,15 @@ export class AgentSession<
658
661
  throw new Error('AgentSession is not running');
659
662
  }
660
663
 
661
- const userMessage = options?.userInput
662
- ? new ChatMessage({
663
- role: 'user',
664
- content: options.userInput,
665
- })
666
- : undefined;
664
+ const userMessage =
665
+ options?.userInput instanceof ChatMessage
666
+ ? options.userInput
667
+ : options?.userInput
668
+ ? new ChatMessage({
669
+ role: 'user',
670
+ content: options.userInput,
671
+ })
672
+ : undefined;
667
673
 
668
674
  const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
669
675
  if (activity.schedulingPaused) {
@@ -18,6 +18,7 @@ import type { STT } from '../stt/index.js';
18
18
  import type { STTError } from '../stt/stt.js';
19
19
  import type { TTS } from '../tts/index.js';
20
20
  import type { TTSError } from '../tts/tts.js';
21
+ import type { AgentSessionUsage } from './agent_session.js';
21
22
  import type { SpeechHandle } from './speech_handle.js';
22
23
 
23
24
  export enum AgentSessionEventTypes {
@@ -27,6 +28,7 @@ export enum AgentSessionEventTypes {
27
28
  ConversationItemAdded = 'conversation_item_added',
28
29
  FunctionToolsExecuted = 'function_tools_executed',
29
30
  MetricsCollected = 'metrics_collected',
31
+ SessionUsageUpdated = 'session_usage_updated',
30
32
  SpeechCreated = 'speech_created',
31
33
  OverlappingSpeech = 'overlapping_speech',
32
34
  Error = 'error',
@@ -133,6 +135,24 @@ export const createMetricsCollectedEvent = ({
133
135
  createdAt,
134
136
  });
135
137
 
138
+ export type SessionUsageUpdatedEvent = {
139
+ type: 'session_usage_updated';
140
+ usage: AgentSessionUsage;
141
+ createdAt: number;
142
+ };
143
+
144
+ export const createSessionUsageUpdatedEvent = ({
145
+ usage,
146
+ createdAt = Date.now(),
147
+ }: {
148
+ usage: AgentSessionUsage;
149
+ createdAt?: number;
150
+ }): SessionUsageUpdatedEvent => ({
151
+ type: 'session_usage_updated',
152
+ usage,
153
+ createdAt,
154
+ });
155
+
136
156
  export type ConversationItemAddedEvent = {
137
157
  type: 'conversation_item_added';
138
158
  item: ChatMessage;
@@ -264,6 +284,7 @@ export type AgentEvent =
264
284
  | UserStateChangedEvent
265
285
  | AgentStateChangedEvent
266
286
  | MetricsCollectedEvent
287
+ | SessionUsageUpdatedEvent
267
288
  | ConversationItemAddedEvent
268
289
  | FunctionToolsExecutedEvent
269
290
  | SpeechCreatedEvent
@@ -25,7 +25,15 @@ import { log } from '../log.js';
25
25
  import { IdentityTransform } from '../stream/identity_transform.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
27
  import { USERDATA_TIMED_TRANSCRIPT } from '../types.js';
28
- import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
28
+ import {
29
+ Future,
30
+ IdleTimeoutError,
31
+ Task,
32
+ shortuuid,
33
+ toError,
34
+ waitForAbort,
35
+ waitUntilTimeout,
36
+ } from '../utils.js';
29
37
  import {
30
38
  type Agent,
31
39
  type ModelSettings,
@@ -46,6 +54,8 @@ import {
46
54
  import { RunContext } from './run_context.js';
47
55
  import type { SpeechHandle } from './speech_handle.js';
48
56
 
57
+ const TTS_READ_IDLE_TIMEOUT_MS = 10_000;
58
+
49
59
  /** @internal */
50
60
  export class _LLMGenerationData {
51
61
  generatedText: string = '';
@@ -550,6 +560,7 @@ export function performTTSInference(
550
560
  model?: string,
551
561
  provider?: string,
552
562
  ): [Task<void>, _TTSGenerationData] {
563
+ const logger = log();
553
564
  const audioStream = new IdentityTransform<AudioFrame>();
554
565
  const outputWriter = audioStream.writable.getWriter();
555
566
  const audioOutputStream = audioStream.readable;
@@ -624,12 +635,15 @@ export function performTTSInference(
624
635
  // JS currently only does single inference, so initialPushedDuration is always 0.
625
636
  // TODO: Add FlushSentinel + multi-segment loop
626
637
  const initialPushedDuration = pushedDuration;
627
-
628
638
  while (true) {
629
639
  if (signal.aborted) {
630
640
  break;
631
641
  }
632
- const { done, value: frame } = await ttsStreamReader.read();
642
+
643
+ const { done, value: frame } = await waitUntilTimeout(
644
+ ttsStreamReader.read(),
645
+ TTS_READ_IDLE_TIMEOUT_MS,
646
+ );
633
647
  if (done) {
634
648
  break;
635
649
  }
@@ -671,14 +685,15 @@ export function performTTSInference(
671
685
  pushedDuration += frameDuration;
672
686
  }
673
687
  } catch (error) {
674
- if (error instanceof DOMException && error.name === 'AbortError') {
675
- // Abort signal was triggered, handle gracefully
688
+ if (error instanceof IdleTimeoutError) {
689
+ logger.warn('TTS stream stalled after producing audio, forcing close');
690
+ } else if (error instanceof DOMException && error.name === 'AbortError') {
676
691
  return;
692
+ } else {
693
+ throw error;
677
694
  }
678
- throw error;
679
695
  } finally {
680
696
  if (!timedTextsFut.done) {
681
- // Ensure downstream consumers don't hang on errors.
682
697
  timedTextsFut.resolve(null);
683
698
  }
684
699
  ttsStreamReader?.releaseLock();
@@ -773,9 +788,12 @@ async function forwardAudio(
773
788
  out: _AudioOut,
774
789
  signal?: AbortSignal,
775
790
  ): Promise<void> {
791
+ const logger = log();
776
792
  const reader = ttsStream.getReader();
777
793
  let resampler: AudioResampler | null = null;
778
794
 
795
+ const FORWARD_AUDIO_IDLE_TIMEOUT_MS = 10_000;
796
+
779
797
  const onPlaybackStarted = (ev: { createdAt: number }) => {
780
798
  if (!out.firstFrameFut.done) {
781
799
  out.firstFrameFut.resolve(ev.createdAt);
@@ -791,7 +809,10 @@ async function forwardAudio(
791
809
  break;
792
810
  }
793
811
 
794
- const { done, value: frame } = await reader.read();
812
+ const { done, value: frame } = await waitUntilTimeout(
813
+ reader.read(),
814
+ FORWARD_AUDIO_IDLE_TIMEOUT_MS,
815
+ );
795
816
  if (done) break;
796
817
 
797
818
  out.audio.push(frame);
@@ -819,6 +840,12 @@ async function forwardAudio(
819
840
  await audioOutput.captureFrame(f);
820
841
  }
821
842
  }
843
+ } catch (e) {
844
+ if (e instanceof IdleTimeoutError) {
845
+ logger.warn('audio forwarding stalled waiting for TTS frames, forcing close');
846
+ } else {
847
+ throw e;
848
+ }
822
849
  } finally {
823
850
  audioOutput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
824
851
 
@@ -0,0 +1,112 @@
1
+ // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { AudioFrame } from '@livekit/rtc-node';
5
+ import { ReadableStream } from 'stream/web';
6
+ import { describe, expect, it, vi } from 'vitest';
7
+ import { initializeLogger } from '../log.js';
8
+ import { performAudioForwarding, performTTSInference } from './generation.js';
9
+ import { AudioOutput } from './io.js';
10
+
11
+ function createSilentFrame(sampleRate = 24000, channels = 1, durationMs = 20): AudioFrame {
12
+ const samplesPerChannel = Math.floor((sampleRate * durationMs) / 1000);
13
+ const data = new Int16Array(samplesPerChannel * channels);
14
+ return new AudioFrame(data, sampleRate, channels, samplesPerChannel);
15
+ }
16
+
17
+ class MockAudioOutput extends AudioOutput {
18
+ capturedFrames: AudioFrame[] = [];
19
+
20
+ constructor() {
21
+ super(24000);
22
+ }
23
+
24
+ async captureFrame(frame: AudioFrame): Promise<void> {
25
+ await super.captureFrame(frame);
26
+ this.capturedFrames.push(frame);
27
+ this.onPlaybackStarted(Date.now());
28
+ }
29
+
30
+ clearBuffer(): void {
31
+ // no-op for mock
32
+ }
33
+ }
34
+
35
+ describe('TTS stream idle timeout', () => {
36
+ initializeLogger({ pretty: false, level: 'silent' });
37
+
38
+ it('forwardAudio completes when TTS stream stalls after producing frames', async () => {
39
+ const stalledStream = new ReadableStream<AudioFrame>({
40
+ start(controller) {
41
+ controller.enqueue(createSilentFrame());
42
+ controller.enqueue(createSilentFrame());
43
+ },
44
+ });
45
+
46
+ const audioOutput = new MockAudioOutput();
47
+ const controller = new AbortController();
48
+
49
+ const [task, audioOut] = performAudioForwarding(stalledStream, audioOutput, controller);
50
+
51
+ vi.useFakeTimers();
52
+
53
+ const taskPromise = task.result;
54
+ await vi.advanceTimersByTimeAsync(11_000);
55
+ await taskPromise;
56
+
57
+ vi.useRealTimers();
58
+
59
+ expect(audioOutput.capturedFrames.length).toBe(2);
60
+ expect(audioOut.firstFrameFut.done).toBe(true);
61
+ }, 10_000);
62
+
63
+ it('forwardAudio completes normally when TTS stream closes properly', async () => {
64
+ const normalStream = new ReadableStream<AudioFrame>({
65
+ start(controller) {
66
+ controller.enqueue(createSilentFrame());
67
+ controller.enqueue(createSilentFrame());
68
+ controller.enqueue(createSilentFrame());
69
+ controller.close();
70
+ },
71
+ });
72
+
73
+ const audioOutput = new MockAudioOutput();
74
+ const controller = new AbortController();
75
+
76
+ const [task, audioOut] = performAudioForwarding(normalStream, audioOutput, controller);
77
+
78
+ await task.result;
79
+
80
+ expect(audioOutput.capturedFrames.length).toBe(3);
81
+ expect(audioOut.firstFrameFut.done).toBe(true);
82
+ });
83
+
84
+ it('performTTSInference completes when TTS node returns stalled stream', async () => {
85
+ const stalledTtsStream = new ReadableStream<AudioFrame>({
86
+ start(controller) {
87
+ controller.enqueue(createSilentFrame());
88
+ },
89
+ });
90
+
91
+ const ttsNode = async () => stalledTtsStream;
92
+ const textInput = new ReadableStream<string>({
93
+ start(controller) {
94
+ controller.enqueue('Hello world');
95
+ controller.close();
96
+ },
97
+ });
98
+
99
+ const controller = new AbortController();
100
+ const [task, genData] = performTTSInference(ttsNode, textInput, {}, controller);
101
+
102
+ vi.useFakeTimers();
103
+
104
+ const taskPromise = task.result;
105
+ await vi.advanceTimersByTimeAsync(11_000);
106
+ await taskPromise;
107
+
108
+ vi.useRealTimers();
109
+
110
+ expect(genData.ttfb).toBeDefined();
111
+ }, 10_000);
112
+ });