@livekit/agents 1.0.24 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -4
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +0 -1
  14. package/dist/inference/tts.d.ts +0 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +4 -4
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  19. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  20. package/dist/ipc/job_proc_lazy_main.js +1 -1
  21. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  22. package/dist/job.cjs +29 -2
  23. package/dist/job.cjs.map +1 -1
  24. package/dist/job.d.cts +6 -0
  25. package/dist/job.d.ts +6 -0
  26. package/dist/job.d.ts.map +1 -1
  27. package/dist/job.js +19 -2
  28. package/dist/job.js.map +1 -1
  29. package/dist/llm/llm.cjs +2 -1
  30. package/dist/llm/llm.cjs.map +1 -1
  31. package/dist/llm/llm.d.cts +1 -1
  32. package/dist/llm/llm.d.ts +1 -1
  33. package/dist/llm/llm.d.ts.map +1 -1
  34. package/dist/llm/llm.js +2 -1
  35. package/dist/llm/llm.js.map +1 -1
  36. package/dist/stream/deferred_stream.cjs +12 -4
  37. package/dist/stream/deferred_stream.cjs.map +1 -1
  38. package/dist/stream/deferred_stream.d.cts +6 -1
  39. package/dist/stream/deferred_stream.d.ts +6 -1
  40. package/dist/stream/deferred_stream.d.ts.map +1 -1
  41. package/dist/stream/deferred_stream.js +12 -4
  42. package/dist/stream/deferred_stream.js.map +1 -1
  43. package/dist/stream/deferred_stream.test.cjs +2 -2
  44. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  45. package/dist/stream/deferred_stream.test.js +2 -2
  46. package/dist/stream/deferred_stream.test.js.map +1 -1
  47. package/dist/stt/stream_adapter.cjs +15 -8
  48. package/dist/stt/stream_adapter.cjs.map +1 -1
  49. package/dist/stt/stream_adapter.d.cts +7 -3
  50. package/dist/stt/stream_adapter.d.ts +7 -3
  51. package/dist/stt/stream_adapter.d.ts.map +1 -1
  52. package/dist/stt/stream_adapter.js +15 -8
  53. package/dist/stt/stream_adapter.js.map +1 -1
  54. package/dist/stt/stt.cjs +8 -3
  55. package/dist/stt/stt.cjs.map +1 -1
  56. package/dist/stt/stt.d.cts +9 -3
  57. package/dist/stt/stt.d.ts +9 -3
  58. package/dist/stt/stt.d.ts.map +1 -1
  59. package/dist/stt/stt.js +9 -4
  60. package/dist/stt/stt.js.map +1 -1
  61. package/dist/telemetry/traces.cjs +23 -2
  62. package/dist/telemetry/traces.cjs.map +1 -1
  63. package/dist/telemetry/traces.d.ts.map +1 -1
  64. package/dist/telemetry/traces.js +23 -2
  65. package/dist/telemetry/traces.js.map +1 -1
  66. package/dist/tts/stream_adapter.cjs +10 -7
  67. package/dist/tts/stream_adapter.cjs.map +1 -1
  68. package/dist/tts/stream_adapter.d.cts +6 -3
  69. package/dist/tts/stream_adapter.d.ts +6 -3
  70. package/dist/tts/stream_adapter.d.ts.map +1 -1
  71. package/dist/tts/stream_adapter.js +10 -7
  72. package/dist/tts/stream_adapter.js.map +1 -1
  73. package/dist/tts/tts.cjs +27 -16
  74. package/dist/tts/tts.cjs.map +1 -1
  75. package/dist/tts/tts.d.cts +12 -5
  76. package/dist/tts/tts.d.ts +12 -5
  77. package/dist/tts/tts.d.ts.map +1 -1
  78. package/dist/tts/tts.js +28 -17
  79. package/dist/tts/tts.js.map +1 -1
  80. package/dist/types.cjs +21 -32
  81. package/dist/types.cjs.map +1 -1
  82. package/dist/types.d.cts +41 -10
  83. package/dist/types.d.ts +41 -10
  84. package/dist/types.d.ts.map +1 -1
  85. package/dist/types.js +18 -30
  86. package/dist/types.js.map +1 -1
  87. package/dist/voice/agent.cjs +54 -19
  88. package/dist/voice/agent.cjs.map +1 -1
  89. package/dist/voice/agent.d.ts.map +1 -1
  90. package/dist/voice/agent.js +54 -19
  91. package/dist/voice/agent.js.map +1 -1
  92. package/dist/voice/agent_activity.cjs +0 -3
  93. package/dist/voice/agent_activity.cjs.map +1 -1
  94. package/dist/voice/agent_activity.d.ts.map +1 -1
  95. package/dist/voice/agent_activity.js +0 -3
  96. package/dist/voice/agent_activity.js.map +1 -1
  97. package/dist/voice/agent_session.cjs +107 -27
  98. package/dist/voice/agent_session.cjs.map +1 -1
  99. package/dist/voice/agent_session.d.cts +16 -2
  100. package/dist/voice/agent_session.d.ts +16 -2
  101. package/dist/voice/agent_session.d.ts.map +1 -1
  102. package/dist/voice/agent_session.js +110 -27
  103. package/dist/voice/agent_session.js.map +1 -1
  104. package/dist/voice/events.cjs.map +1 -1
  105. package/dist/voice/events.d.cts +4 -4
  106. package/dist/voice/events.d.ts +4 -4
  107. package/dist/voice/events.d.ts.map +1 -1
  108. package/dist/voice/events.js.map +1 -1
  109. package/dist/voice/generation.cjs +6 -7
  110. package/dist/voice/generation.cjs.map +1 -1
  111. package/dist/voice/generation.d.ts.map +1 -1
  112. package/dist/voice/generation.js +7 -8
  113. package/dist/voice/generation.js.map +1 -1
  114. package/dist/voice/io.cjs +16 -0
  115. package/dist/voice/io.cjs.map +1 -1
  116. package/dist/voice/io.d.cts +8 -0
  117. package/dist/voice/io.d.ts +8 -0
  118. package/dist/voice/io.d.ts.map +1 -1
  119. package/dist/voice/io.js +16 -0
  120. package/dist/voice/io.js.map +1 -1
  121. package/dist/voice/recorder_io/index.cjs +23 -0
  122. package/dist/voice/recorder_io/index.cjs.map +1 -0
  123. package/dist/voice/recorder_io/index.d.cts +2 -0
  124. package/dist/voice/recorder_io/index.d.ts +2 -0
  125. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  126. package/dist/voice/recorder_io/index.js +2 -0
  127. package/dist/voice/recorder_io/index.js.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  129. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  130. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  131. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  132. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  133. package/dist/voice/recorder_io/recorder_io.js +508 -0
  134. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  135. package/dist/voice/report.cjs +7 -2
  136. package/dist/voice/report.cjs.map +1 -1
  137. package/dist/voice/report.d.cts +11 -1
  138. package/dist/voice/report.d.ts +11 -1
  139. package/dist/voice/report.d.ts.map +1 -1
  140. package/dist/voice/report.js +7 -2
  141. package/dist/voice/report.js.map +1 -1
  142. package/dist/voice/room_io/_input.cjs +2 -1
  143. package/dist/voice/room_io/_input.cjs.map +1 -1
  144. package/dist/voice/room_io/_input.d.ts.map +1 -1
  145. package/dist/voice/room_io/_input.js +2 -1
  146. package/dist/voice/room_io/_input.js.map +1 -1
  147. package/dist/voice/room_io/_output.cjs +8 -7
  148. package/dist/voice/room_io/_output.cjs.map +1 -1
  149. package/dist/voice/room_io/_output.d.cts +2 -1
  150. package/dist/voice/room_io/_output.d.ts +2 -1
  151. package/dist/voice/room_io/_output.d.ts.map +1 -1
  152. package/dist/voice/room_io/_output.js +8 -7
  153. package/dist/voice/room_io/_output.js.map +1 -1
  154. package/dist/worker.cjs +4 -3
  155. package/dist/worker.cjs.map +1 -1
  156. package/dist/worker.js +4 -3
  157. package/dist/worker.js.map +1 -1
  158. package/package.json +1 -1
  159. package/src/inference/llm.ts +0 -1
  160. package/src/inference/stt.ts +1 -2
  161. package/src/inference/tts.ts +5 -4
  162. package/src/ipc/job_proc_lazy_main.ts +1 -1
  163. package/src/job.ts +21 -2
  164. package/src/llm/llm.ts +2 -2
  165. package/src/stream/deferred_stream.test.ts +3 -3
  166. package/src/stream/deferred_stream.ts +22 -5
  167. package/src/stt/stream_adapter.ts +18 -8
  168. package/src/stt/stt.ts +19 -6
  169. package/src/telemetry/traces.ts +25 -3
  170. package/src/tts/stream_adapter.ts +15 -7
  171. package/src/tts/tts.ts +46 -21
  172. package/src/types.ts +57 -33
  173. package/src/voice/agent.ts +59 -19
  174. package/src/voice/agent_activity.ts +0 -3
  175. package/src/voice/agent_session.ts +142 -35
  176. package/src/voice/events.ts +6 -3
  177. package/src/voice/generation.ts +10 -8
  178. package/src/voice/io.ts +19 -0
  179. package/src/voice/recorder_io/index.ts +4 -0
  180. package/src/voice/recorder_io/recorder_io.ts +690 -0
  181. package/src/voice/report.ts +20 -3
  182. package/src/voice/room_io/_input.ts +2 -1
  183. package/src/voice/room_io/_output.ts +10 -7
  184. package/src/worker.ts +1 -1
@@ -260,27 +260,41 @@ export class Agent<UserData = any> {
260
260
  let wrapped_stt = activity.stt;
261
261
 
262
262
  if (!wrapped_stt.capabilities.streaming) {
263
- if (!agent.vad) {
263
+ const vad = agent.vad || activity.vad;
264
+ if (!vad) {
264
265
  throw new Error(
265
266
  'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',
266
267
  );
267
268
  }
268
- wrapped_stt = new STTStreamAdapter(wrapped_stt, agent.vad);
269
+ wrapped_stt = new STTStreamAdapter(wrapped_stt, vad);
269
270
  }
270
271
 
271
- const stream = wrapped_stt.stream();
272
+ const connOptions = activity.agentSession.connOptions.sttConnOptions;
273
+ const stream = wrapped_stt.stream({ connOptions });
272
274
  stream.updateInputStream(audio);
273
275
 
276
+ let cleaned = false;
277
+ const cleanup = () => {
278
+ if (cleaned) return;
279
+ cleaned = true;
280
+ stream.detachInputStream();
281
+ stream.close();
282
+ };
283
+
274
284
  return new ReadableStream({
275
285
  async start(controller) {
276
- for await (const event of stream) {
277
- controller.enqueue(event);
286
+ try {
287
+ for await (const event of stream) {
288
+ controller.enqueue(event);
289
+ }
290
+ controller.close();
291
+ } finally {
292
+ // Always clean up the STT stream, whether it ends naturally or is cancelled
293
+ cleanup();
278
294
  }
279
- controller.close();
280
295
  },
281
296
  cancel() {
282
- stream.detachInputStream();
283
- stream.close();
297
+ cleanup();
284
298
  },
285
299
  });
286
300
  },
@@ -304,22 +318,36 @@ export class Agent<UserData = any> {
304
318
 
305
319
  // TODO(brian): make parallelToolCalls configurable
306
320
  const { toolChoice } = modelSettings;
321
+ const connOptions = activity.agentSession.connOptions.llmConnOptions;
307
322
 
308
323
  const stream = activity.llm.chat({
309
324
  chatCtx,
310
325
  toolCtx,
311
326
  toolChoice,
327
+ connOptions,
312
328
  parallelToolCalls: true,
313
329
  });
330
+
331
+ let cleaned = false;
332
+ const cleanup = () => {
333
+ if (cleaned) return;
334
+ cleaned = true;
335
+ stream.close();
336
+ };
337
+
314
338
  return new ReadableStream({
315
339
  async start(controller) {
316
- for await (const chunk of stream) {
317
- controller.enqueue(chunk);
340
+ try {
341
+ for await (const chunk of stream) {
342
+ controller.enqueue(chunk);
343
+ }
344
+ controller.close();
345
+ } finally {
346
+ cleanup();
318
347
  }
319
- controller.close();
320
348
  },
321
349
  cancel() {
322
- stream.close();
350
+ cleanup();
323
351
  },
324
352
  });
325
353
  },
@@ -340,21 +368,33 @@ export class Agent<UserData = any> {
340
368
  wrapped_tts = new TTSStreamAdapter(wrapped_tts, new BasicSentenceTokenizer());
341
369
  }
342
370
 
343
- const stream = wrapped_tts.stream();
371
+ const connOptions = activity.agentSession.connOptions.ttsConnOptions;
372
+ const stream = wrapped_tts.stream({ connOptions });
344
373
  stream.updateInputStream(text);
345
374
 
375
+ let cleaned = false;
376
+ const cleanup = () => {
377
+ if (cleaned) return;
378
+ cleaned = true;
379
+ stream.close();
380
+ };
381
+
346
382
  return new ReadableStream({
347
383
  async start(controller) {
348
- for await (const chunk of stream) {
349
- if (chunk === SynthesizeStream.END_OF_STREAM) {
350
- break;
384
+ try {
385
+ for await (const chunk of stream) {
386
+ if (chunk === SynthesizeStream.END_OF_STREAM) {
387
+ break;
388
+ }
389
+ controller.enqueue(chunk.frame);
351
390
  }
352
- controller.enqueue(chunk.frame);
391
+ controller.close();
392
+ } finally {
393
+ cleanup();
353
394
  }
354
- controller.close();
355
395
  },
356
396
  cancel() {
357
- stream.close();
397
+ cleanup();
358
398
  },
359
399
  });
360
400
  },
@@ -2259,15 +2259,12 @@ export class AgentActivity implements RecognitionHooks {
2259
2259
  }
2260
2260
  if (this.stt instanceof STT) {
2261
2261
  this.stt.off('metrics_collected', this.onMetricsCollected);
2262
- await this.stt.close();
2263
2262
  }
2264
2263
  if (this.tts instanceof TTS) {
2265
2264
  this.tts.off('metrics_collected', this.onMetricsCollected);
2266
- await this.tts.close();
2267
2265
  }
2268
2266
  if (this.vad instanceof VAD) {
2269
2267
  this.vad.off('metrics_collected', this.onMetricsCollected);
2270
- await this.vad.close();
2271
2268
  }
2272
2269
 
2273
2270
  this.detachAudioInput();
@@ -15,7 +15,7 @@ import {
15
15
  type STTModelString,
16
16
  type TTSModelString,
17
17
  } from '../inference/index.js';
18
- import { getJobContext } from '../job.js';
18
+ import { type JobContext, getJobContext } from '../job.js';
19
19
  import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
20
20
  import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
21
21
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
@@ -25,6 +25,12 @@ import type { STT } from '../stt/index.js';
25
25
  import type { STTError } from '../stt/stt.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
27
  import type { TTS, TTSError } from '../tts/tts.js';
28
+ import {
29
+ DEFAULT_API_CONNECT_OPTIONS,
30
+ DEFAULT_SESSION_CONNECT_OPTIONS,
31
+ type ResolvedSessionConnectOptions,
32
+ type SessionConnectOptions,
33
+ } from '../types.js';
28
34
  import type { VAD } from '../vad.js';
29
35
  import type { Agent } from './agent.js';
30
36
  import { AgentActivity } from './agent_activity.js';
@@ -40,6 +46,7 @@ import {
40
46
  type ErrorEvent,
41
47
  type FunctionToolsExecutedEvent,
42
48
  type MetricsCollectedEvent,
49
+ type ShutdownReason,
43
50
  type SpeechCreatedEvent,
44
51
  type UserInputTranscribedEvent,
45
52
  type UserState,
@@ -50,6 +57,7 @@ import {
50
57
  createUserStateChangedEvent,
51
58
  } from './events.js';
52
59
  import { AgentInput, AgentOutput } from './io.js';
60
+ import { RecorderIO } from './recorder_io/index.js';
53
61
  import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
54
62
  import type { UnknownUserData } from './run_context.js';
55
63
  import type { SpeechHandle } from './speech_handle.js';
@@ -100,6 +108,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
100
108
  tts?: TTS | TTSModelString;
101
109
  userData?: UserData;
102
110
  voiceOptions?: Partial<VoiceOptions>;
111
+ connOptions?: SessionConnectOptions;
103
112
  };
104
113
 
105
114
  export class AgentSession<
@@ -132,10 +141,20 @@ export class AgentSession<
132
141
  private closingTask: Promise<void> | null = null;
133
142
  private userAwayTimer: NodeJS.Timeout | null = null;
134
143
 
144
+ // Connection options for STT, LLM, and TTS
145
+ private _connOptions: ResolvedSessionConnectOptions;
146
+
147
+ // Unrecoverable error counts, reset after agent speaking
148
+ private llmErrorCounts = 0;
149
+ private ttsErrorCounts = 0;
150
+
135
151
  private sessionSpan?: Span;
136
152
  private userSpeakingSpan?: Span;
137
153
  private agentSpeakingSpan?: Span;
138
154
 
155
+ /** @internal */
156
+ _recorderIO?: RecorderIO;
157
+
139
158
  /** @internal */
140
159
  rootSpanContext?: Context;
141
160
 
@@ -159,8 +178,19 @@ export class AgentSession<
159
178
  turnDetection,
160
179
  userData,
161
180
  voiceOptions = defaultVoiceOptions,
181
+ connOptions,
162
182
  } = opts;
163
183
 
184
+ // Merge user-provided connOptions with defaults
185
+ this._connOptions = {
186
+ sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
187
+ llmConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.llmConnOptions },
188
+ ttsConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.ttsConnOptions },
189
+ maxUnrecoverableErrors:
190
+ connOptions?.maxUnrecoverableErrors ??
191
+ DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors,
192
+ };
193
+
164
194
  this.vad = vad;
165
195
 
166
196
  if (typeof stt === 'string') {
@@ -225,6 +255,11 @@ export class AgentSession<
225
255
  return this._chatCtx;
226
256
  }
227
257
 
258
+ /** Connection options for STT, LLM, and TTS. */
259
+ get connOptions(): ResolvedSessionConnectOptions {
260
+ return this._connOptions;
261
+ }
262
+
228
263
  set userData(value: UserData) {
229
264
  this._userData = value;
230
265
  }
@@ -234,14 +269,12 @@ export class AgentSession<
234
269
  room,
235
270
  inputOptions,
236
271
  outputOptions,
237
- record,
238
272
  span,
239
273
  }: {
240
274
  agent: Agent;
241
275
  room: Room;
242
276
  inputOptions?: Partial<RoomInputOptions>;
243
277
  outputOptions?: Partial<RoomOutputOptions>;
244
- record: boolean;
245
278
  span: Span;
246
279
  }): Promise<void> {
247
280
  span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
@@ -275,20 +308,39 @@ export class AgentSession<
275
308
  });
276
309
  this.roomIO.start();
277
310
 
278
- const ctx = getJobContext();
279
- if (ctx && ctx.room === room && !room.isConnected) {
280
- this.logger.debug('Auto-connecting to room via job context');
281
- tasks.push(ctx.connect());
311
+ let ctx: JobContext | undefined = undefined;
312
+ try {
313
+ ctx = getJobContext();
314
+ } catch (error) {
315
+ // JobContext is not available in evals
316
+ this.logger.warn('JobContext is not available');
282
317
  }
283
318
 
284
- if (record) {
319
+ if (ctx) {
320
+ if (ctx.room === room && !room.isConnected) {
321
+ this.logger.debug('Auto-connecting to room via job context');
322
+ tasks.push(ctx.connect());
323
+ }
324
+
285
325
  if (ctx._primaryAgentSession === undefined) {
286
326
  ctx._primaryAgentSession = this;
287
- } else {
327
+ } else if (this._enableRecording) {
288
328
  throw new Error(
289
- 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
329
+ 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use `session.start({ record: false })`.',
290
330
  );
291
331
  }
332
+
333
+ if (this.input.audio && this.output.audio && this._enableRecording) {
334
+ this._recorderIO = new RecorderIO({ agentSession: this });
335
+ this.input.audio = this._recorderIO.recordInput(this.input.audio);
336
+ this.output.audio = this._recorderIO.recordOutput(this.output.audio);
337
+
338
+ // Start recording to session directory
339
+ const sessionDir = ctx.sessionDirectory;
340
+ if (sessionDir) {
341
+ tasks.push(this._recorderIO.start(`${sessionDir}/audio.ogg`));
342
+ }
343
+ }
292
344
  }
293
345
 
294
346
  // TODO(AJS-265): add shutdown callback to job context
@@ -327,29 +379,29 @@ export class AgentSession<
327
379
  return;
328
380
  }
329
381
 
330
- const ctx = getJobContext();
382
+ let ctx: JobContext | undefined = undefined;
383
+ try {
384
+ ctx = getJobContext();
331
385
 
332
- this.logger.info(
333
- { record, enableRecording: ctx.info.job.enableRecording },
334
- 'Configuring session recording',
335
- );
386
+ if (record === undefined) {
387
+ record = ctx.job.enableRecording;
388
+ }
336
389
 
337
- record = record ?? ctx.info.job.enableRecording;
338
- this._enableRecording = record;
390
+ this._enableRecording = record;
339
391
 
340
- if (this._enableRecording) {
341
- await ctx.initRecording();
392
+ if (this._enableRecording) {
393
+ ctx.initRecording();
394
+ }
395
+ } catch (error) {
396
+ // JobContext is not available in evals
397
+ this.logger.warn('JobContext is not available');
342
398
  }
343
399
 
344
- // Create agent_session as a ROOT span (new trace) to match Python behavior
345
- // This creates a separate trace for better cloud dashboard organization
346
400
  this.sessionSpan = tracer.startSpan({
347
401
  name: 'agent_session',
348
402
  context: ROOT_CONTEXT,
349
403
  });
350
404
 
351
- // Set the session span as the active span in the context
352
- // This ensures all child spans (agent_turn, user_turn, etc.) are parented to it
353
405
  this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
354
406
 
355
407
  await this._startImpl({
@@ -357,7 +409,6 @@ export class AgentSession<
357
409
  room,
358
410
  inputOptions,
359
411
  outputOptions,
360
- record,
361
412
  span: this.sessionSpan,
362
413
  });
363
414
  }
@@ -397,7 +448,17 @@ export class AgentSession<
397
448
  throw new Error('AgentSession is not running');
398
449
  }
399
450
 
400
- return this.activity.say(text, options);
451
+ const doSay = (activity: AgentActivity) => {
452
+ return activity.say(text, options);
453
+ };
454
+
455
+ // attach to the session span if called outside of the AgentSession
456
+ const activeSpan = trace.getActiveSpan();
457
+ if (!activeSpan && this.rootSpanContext) {
458
+ return otelContext.with(this.rootSpanContext, () => doSay(this.activity!));
459
+ }
460
+
461
+ return doSay(this.activity);
401
462
  }
402
463
 
403
464
  interrupt() {
@@ -424,14 +485,25 @@ export class AgentSession<
424
485
  })
425
486
  : undefined;
426
487
 
427
- if (this.activity.draining) {
428
- if (!this.nextActivity) {
429
- throw new Error('AgentSession is closing, cannot use generateReply()');
488
+ const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
489
+ if (activity.draining) {
490
+ if (!nextActivity) {
491
+ throw new Error('AgentSession is closing, cannot use generateReply()');
492
+ }
493
+ return nextActivity.generateReply({ userMessage, ...options });
430
494
  }
431
- return this.nextActivity.generateReply({ userMessage, ...options });
495
+ return activity.generateReply({ userMessage, ...options });
496
+ };
497
+
498
+ // attach to the session span if called outside of the AgentSession
499
+ const activeSpan = trace.getActiveSpan();
500
+ if (!activeSpan && this.rootSpanContext) {
501
+ return otelContext.with(this.rootSpanContext, () =>
502
+ doGenerateReply(this.activity!, this.nextActivity),
503
+ );
432
504
  }
433
505
 
434
- return this.activity.generateReply({ userMessage, ...options });
506
+ return doGenerateReply(this.activity!, this.nextActivity);
435
507
  }
436
508
 
437
509
  private async updateActivity(agent: Agent): Promise<void> {
@@ -492,13 +564,22 @@ export class AgentSession<
492
564
  await this.closeImpl(CloseReason.USER_INITIATED);
493
565
  }
494
566
 
567
+ shutdown(options?: { drain?: boolean; reason?: ShutdownReason }): void {
568
+ const { drain = true, reason = CloseReason.USER_INITIATED } = options ?? {};
569
+
570
+ this._closeSoon({
571
+ reason,
572
+ drain,
573
+ });
574
+ }
575
+
495
576
  /** @internal */
496
577
  _closeSoon({
497
578
  reason,
498
579
  drain = false,
499
580
  error = null,
500
581
  }: {
501
- reason: CloseReason;
582
+ reason: ShutdownReason;
502
583
  drain?: boolean;
503
584
  error?: RealtimeModelError | STTError | TTSError | LLMError | null;
504
585
  }): void {
@@ -514,6 +595,19 @@ export class AgentSession<
514
595
  return;
515
596
  }
516
597
 
598
+ // Track error counts per type to implement max_unrecoverable_errors logic
599
+ if (error.type === 'llm_error') {
600
+ this.llmErrorCounts += 1;
601
+ if (this.llmErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
602
+ return;
603
+ }
604
+ } else if (error.type === 'tts_error') {
605
+ this.ttsErrorCounts += 1;
606
+ if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
607
+ return;
608
+ }
609
+ }
610
+
517
611
  this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
518
612
 
519
613
  this.closingTask = (async () => {
@@ -541,7 +635,9 @@ export class AgentSession<
541
635
  }
542
636
 
543
637
  if (state === 'speaking') {
544
- // TODO(brian): PR4 - Track error counts
638
+ // Reset error counts when agent starts speaking
639
+ this.llmErrorCounts = 0;
640
+ this.ttsErrorCounts = 0;
545
641
 
546
642
  if (this.agentSpeakingSpan === undefined) {
547
643
  this.agentSpeakingSpan = tracer.startSpan({
@@ -657,7 +753,7 @@ export class AgentSession<
657
753
  }
658
754
 
659
755
  private async closeImpl(
660
- reason: CloseReason,
756
+ reason: ShutdownReason,
661
757
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
662
758
  drain: boolean = false,
663
759
  ): Promise<void> {
@@ -671,7 +767,7 @@ export class AgentSession<
671
767
  }
672
768
 
673
769
  private async closeImplInner(
674
- reason: CloseReason,
770
+ reason: ShutdownReason,
675
771
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
676
772
  drain: boolean = false,
677
773
  ): Promise<void> {
@@ -694,7 +790,16 @@ export class AgentSession<
694
790
  await this.activity.drain();
695
791
  // wait any uninterruptible speech to finish
696
792
  await this.activity.currentSpeech?.waitForPlayout();
697
- this.activity.detachAudioInput();
793
+ try {
794
+ this.activity.detachAudioInput();
795
+ } catch (error) {
796
+ // Ignore detach errors during cleanup - source may not have been set
797
+ }
798
+ }
799
+
800
+ // Close recorder before detaching inputs/outputs (keep reference for session report)
801
+ if (this._recorderIO) {
802
+ await this._recorderIO.close();
698
803
  }
699
804
 
700
805
  // detach the inputs and outputs
@@ -730,6 +835,8 @@ export class AgentSession<
730
835
  this.userState = 'listening';
731
836
  this._agentState = 'initializing';
732
837
  this.rootSpanContext = undefined;
838
+ this.llmErrorCounts = 0;
839
+ this.ttsErrorCounts = 0;
733
840
 
734
841
  this.logger.info({ reason, error }, 'AgentSession closed');
735
842
  }
@@ -5,9 +5,10 @@ import type {
5
5
  ChatMessage,
6
6
  FunctionCall,
7
7
  FunctionCallOutput,
8
+ LLM,
9
+ RealtimeModel,
8
10
  RealtimeModelError,
9
11
  } from '../llm/index.js';
10
- import type { LLM, RealtimeModel } from '../llm/index.js';
11
12
  import type { LLMError } from '../llm/llm.js';
12
13
  import type { AgentMetrics } from '../metrics/base.js';
13
14
  import type { STT } from '../stt/index.js';
@@ -38,6 +39,8 @@ export enum CloseReason {
38
39
  USER_INITIATED = 'user_initiated',
39
40
  }
40
41
 
42
+ export type ShutdownReason = CloseReason | string;
43
+
41
44
  export type SpeechSource = 'say' | 'generate_reply' | 'tool_response';
42
45
 
43
46
  export type UserStateChangedEvent = {
@@ -231,12 +234,12 @@ export const createErrorEvent = (
231
234
  export type CloseEvent = {
232
235
  type: 'close';
233
236
  error: RealtimeModelError | STTError | TTSError | LLMError | null;
234
- reason: CloseReason;
237
+ reason: ShutdownReason;
235
238
  createdAt: number;
236
239
  };
237
240
 
238
241
  export const createCloseEvent = (
239
- reason: CloseReason,
242
+ reason: ShutdownReason,
240
243
  error: RealtimeModelError | STTError | TTSError | LLMError | null = null,
241
244
  createdAt: number = Date.now(),
242
245
  ): CloseEvent => ({
@@ -24,7 +24,7 @@ import { isZodSchema, parseZodSchema } from '../llm/zod-utils.js';
24
24
  import { log } from '../log.js';
25
25
  import { IdentityTransform } from '../stream/identity_transform.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
- import { Future, Task, shortuuid, toError } from '../utils.js';
27
+ import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
28
28
  import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
29
29
  import type { AgentSession } from './agent_session.js';
30
30
  import type { AudioOutput, LLMNode, TTSNode, TextOutput } from './io.js';
@@ -411,17 +411,19 @@ export function performLLMInference(
411
411
  return;
412
412
  }
413
413
 
414
+ const abortPromise = waitForAbort(signal);
415
+
414
416
  // TODO(brian): add support for dynamic tools
415
417
 
416
418
  llmStreamReader = llmStream.getReader();
417
419
  while (true) {
418
- if (signal.aborted) {
419
- break;
420
- }
421
- const { done, value: chunk } = await llmStreamReader.read();
422
- if (done) {
423
- break;
424
- }
420
+ if (signal.aborted) break;
421
+
422
+ const result = await Promise.race([llmStreamReader.read(), abortPromise]);
423
+ if (result === undefined) break;
424
+
425
+ const { done, value: chunk } = result;
426
+ if (done) break;
425
427
 
426
428
  if (typeof chunk === 'string') {
427
429
  data.generatedText += chunk;
package/src/voice/io.ts CHANGED
@@ -28,6 +28,7 @@ export type TTSNode = (
28
28
  text: ReadableStream<string>,
29
29
  modelSettings: ModelSettings,
30
30
  ) => Promise<ReadableStream<AudioFrame> | null>;
31
+
31
32
  export abstract class AudioInput {
32
33
  protected deferredStream: DeferredReadableStream<AudioFrame> =
33
34
  new DeferredReadableStream<AudioFrame>();
@@ -128,6 +129,24 @@ export abstract class AudioOutput extends EventEmitter {
128
129
  this.nextInChain.onDetached();
129
130
  }
130
131
  }
132
+
133
+ /**
134
+ * Pause the audio playback
135
+ */
136
+ pause(): void {
137
+ if (this.nextInChain) {
138
+ this.nextInChain.pause();
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Resume the audio playback
144
+ */
145
+ resume(): void {
146
+ if (this.nextInChain) {
147
+ this.nextInChain.resume();
148
+ }
149
+ }
131
150
  }
132
151
 
133
152
  export interface PlaybackFinishedEvent {
@@ -0,0 +1,4 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export * from './recorder_io.js';