@livekit/agents 1.0.24 → 1.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -2
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.ts.map +1 -1
  14. package/dist/inference/tts.js +4 -2
  15. package/dist/inference/tts.js.map +1 -1
  16. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  17. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.js +1 -1
  19. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  20. package/dist/job.cjs +29 -2
  21. package/dist/job.cjs.map +1 -1
  22. package/dist/job.d.cts +6 -0
  23. package/dist/job.d.ts +6 -0
  24. package/dist/job.d.ts.map +1 -1
  25. package/dist/job.js +19 -2
  26. package/dist/job.js.map +1 -1
  27. package/dist/llm/llm.cjs +2 -1
  28. package/dist/llm/llm.cjs.map +1 -1
  29. package/dist/llm/llm.d.cts +1 -1
  30. package/dist/llm/llm.d.ts +1 -1
  31. package/dist/llm/llm.d.ts.map +1 -1
  32. package/dist/llm/llm.js +2 -1
  33. package/dist/llm/llm.js.map +1 -1
  34. package/dist/stream/deferred_stream.cjs +12 -4
  35. package/dist/stream/deferred_stream.cjs.map +1 -1
  36. package/dist/stream/deferred_stream.d.cts +6 -1
  37. package/dist/stream/deferred_stream.d.ts +6 -1
  38. package/dist/stream/deferred_stream.d.ts.map +1 -1
  39. package/dist/stream/deferred_stream.js +12 -4
  40. package/dist/stream/deferred_stream.js.map +1 -1
  41. package/dist/stream/deferred_stream.test.cjs +2 -2
  42. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  43. package/dist/stream/deferred_stream.test.js +2 -2
  44. package/dist/stream/deferred_stream.test.js.map +1 -1
  45. package/dist/stt/stream_adapter.cjs +15 -8
  46. package/dist/stt/stream_adapter.cjs.map +1 -1
  47. package/dist/stt/stream_adapter.d.cts +7 -3
  48. package/dist/stt/stream_adapter.d.ts +7 -3
  49. package/dist/stt/stream_adapter.d.ts.map +1 -1
  50. package/dist/stt/stream_adapter.js +15 -8
  51. package/dist/stt/stream_adapter.js.map +1 -1
  52. package/dist/stt/stt.cjs +8 -3
  53. package/dist/stt/stt.cjs.map +1 -1
  54. package/dist/stt/stt.d.cts +9 -3
  55. package/dist/stt/stt.d.ts +9 -3
  56. package/dist/stt/stt.d.ts.map +1 -1
  57. package/dist/stt/stt.js +9 -4
  58. package/dist/stt/stt.js.map +1 -1
  59. package/dist/telemetry/traces.cjs +23 -2
  60. package/dist/telemetry/traces.cjs.map +1 -1
  61. package/dist/telemetry/traces.d.ts.map +1 -1
  62. package/dist/telemetry/traces.js +23 -2
  63. package/dist/telemetry/traces.js.map +1 -1
  64. package/dist/tts/stream_adapter.cjs +4 -4
  65. package/dist/tts/stream_adapter.cjs.map +1 -1
  66. package/dist/tts/stream_adapter.d.cts +5 -2
  67. package/dist/tts/stream_adapter.d.ts +5 -2
  68. package/dist/tts/stream_adapter.d.ts.map +1 -1
  69. package/dist/tts/stream_adapter.js +4 -4
  70. package/dist/tts/stream_adapter.js.map +1 -1
  71. package/dist/tts/tts.cjs +2 -2
  72. package/dist/tts/tts.cjs.map +1 -1
  73. package/dist/tts/tts.d.cts +5 -1
  74. package/dist/tts/tts.d.ts +5 -1
  75. package/dist/tts/tts.d.ts.map +1 -1
  76. package/dist/tts/tts.js +3 -3
  77. package/dist/tts/tts.js.map +1 -1
  78. package/dist/types.cjs +21 -32
  79. package/dist/types.cjs.map +1 -1
  80. package/dist/types.d.cts +41 -10
  81. package/dist/types.d.ts +41 -10
  82. package/dist/types.d.ts.map +1 -1
  83. package/dist/types.js +18 -30
  84. package/dist/types.js.map +1 -1
  85. package/dist/voice/agent.cjs +54 -19
  86. package/dist/voice/agent.cjs.map +1 -1
  87. package/dist/voice/agent.d.ts.map +1 -1
  88. package/dist/voice/agent.js +54 -19
  89. package/dist/voice/agent.js.map +1 -1
  90. package/dist/voice/agent_activity.cjs +0 -3
  91. package/dist/voice/agent_activity.cjs.map +1 -1
  92. package/dist/voice/agent_activity.d.ts.map +1 -1
  93. package/dist/voice/agent_activity.js +0 -3
  94. package/dist/voice/agent_activity.js.map +1 -1
  95. package/dist/voice/agent_session.cjs +105 -27
  96. package/dist/voice/agent_session.cjs.map +1 -1
  97. package/dist/voice/agent_session.d.cts +16 -2
  98. package/dist/voice/agent_session.d.ts +16 -2
  99. package/dist/voice/agent_session.d.ts.map +1 -1
  100. package/dist/voice/agent_session.js +108 -27
  101. package/dist/voice/agent_session.js.map +1 -1
  102. package/dist/voice/events.cjs.map +1 -1
  103. package/dist/voice/events.d.cts +4 -4
  104. package/dist/voice/events.d.ts +4 -4
  105. package/dist/voice/events.d.ts.map +1 -1
  106. package/dist/voice/events.js.map +1 -1
  107. package/dist/voice/generation.cjs +6 -7
  108. package/dist/voice/generation.cjs.map +1 -1
  109. package/dist/voice/generation.d.ts.map +1 -1
  110. package/dist/voice/generation.js +7 -8
  111. package/dist/voice/generation.js.map +1 -1
  112. package/dist/voice/io.cjs +16 -0
  113. package/dist/voice/io.cjs.map +1 -1
  114. package/dist/voice/io.d.cts +8 -0
  115. package/dist/voice/io.d.ts +8 -0
  116. package/dist/voice/io.d.ts.map +1 -1
  117. package/dist/voice/io.js +16 -0
  118. package/dist/voice/io.js.map +1 -1
  119. package/dist/voice/recorder_io/index.cjs +23 -0
  120. package/dist/voice/recorder_io/index.cjs.map +1 -0
  121. package/dist/voice/recorder_io/index.d.cts +2 -0
  122. package/dist/voice/recorder_io/index.d.ts +2 -0
  123. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  124. package/dist/voice/recorder_io/index.js +2 -0
  125. package/dist/voice/recorder_io/index.js.map +1 -0
  126. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  127. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  129. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  130. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  131. package/dist/voice/recorder_io/recorder_io.js +508 -0
  132. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  133. package/dist/voice/report.cjs +7 -2
  134. package/dist/voice/report.cjs.map +1 -1
  135. package/dist/voice/report.d.cts +11 -1
  136. package/dist/voice/report.d.ts +11 -1
  137. package/dist/voice/report.d.ts.map +1 -1
  138. package/dist/voice/report.js +7 -2
  139. package/dist/voice/report.js.map +1 -1
  140. package/dist/voice/room_io/_input.cjs +2 -1
  141. package/dist/voice/room_io/_input.cjs.map +1 -1
  142. package/dist/voice/room_io/_input.d.ts.map +1 -1
  143. package/dist/voice/room_io/_input.js +2 -1
  144. package/dist/voice/room_io/_input.js.map +1 -1
  145. package/dist/voice/room_io/_output.cjs +8 -7
  146. package/dist/voice/room_io/_output.cjs.map +1 -1
  147. package/dist/voice/room_io/_output.d.cts +2 -1
  148. package/dist/voice/room_io/_output.d.ts +2 -1
  149. package/dist/voice/room_io/_output.d.ts.map +1 -1
  150. package/dist/voice/room_io/_output.js +8 -7
  151. package/dist/voice/room_io/_output.js.map +1 -1
  152. package/dist/worker.cjs +4 -3
  153. package/dist/worker.cjs.map +1 -1
  154. package/dist/worker.js +4 -3
  155. package/dist/worker.js.map +1 -1
  156. package/package.json +1 -1
  157. package/src/inference/llm.ts +0 -1
  158. package/src/inference/stt.ts +1 -2
  159. package/src/inference/tts.ts +5 -2
  160. package/src/ipc/job_proc_lazy_main.ts +1 -1
  161. package/src/job.ts +21 -2
  162. package/src/llm/llm.ts +2 -2
  163. package/src/stream/deferred_stream.test.ts +3 -3
  164. package/src/stream/deferred_stream.ts +22 -5
  165. package/src/stt/stream_adapter.ts +18 -8
  166. package/src/stt/stt.ts +19 -6
  167. package/src/telemetry/traces.ts +25 -3
  168. package/src/tts/stream_adapter.ts +5 -4
  169. package/src/tts/tts.ts +6 -4
  170. package/src/types.ts +57 -33
  171. package/src/voice/agent.ts +59 -19
  172. package/src/voice/agent_activity.ts +0 -3
  173. package/src/voice/agent_session.ts +140 -35
  174. package/src/voice/events.ts +6 -3
  175. package/src/voice/generation.ts +10 -8
  176. package/src/voice/io.ts +19 -0
  177. package/src/voice/recorder_io/index.ts +4 -0
  178. package/src/voice/recorder_io/recorder_io.ts +690 -0
  179. package/src/voice/report.ts +20 -3
  180. package/src/voice/room_io/_input.ts +2 -1
  181. package/src/voice/room_io/_output.ts +10 -7
  182. package/src/worker.ts +1 -1
@@ -15,7 +15,7 @@ import {
15
15
  type STTModelString,
16
16
  type TTSModelString,
17
17
  } from '../inference/index.js';
18
- import { getJobContext } from '../job.js';
18
+ import { type JobContext, getJobContext } from '../job.js';
19
19
  import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
20
20
  import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
21
21
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
@@ -25,6 +25,12 @@ import type { STT } from '../stt/index.js';
25
25
  import type { STTError } from '../stt/stt.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
27
  import type { TTS, TTSError } from '../tts/tts.js';
28
+ import {
29
+ DEFAULT_API_CONNECT_OPTIONS,
30
+ DEFAULT_SESSION_CONNECT_OPTIONS,
31
+ type ResolvedSessionConnectOptions,
32
+ type SessionConnectOptions,
33
+ } from '../types.js';
28
34
  import type { VAD } from '../vad.js';
29
35
  import type { Agent } from './agent.js';
30
36
  import { AgentActivity } from './agent_activity.js';
@@ -40,6 +46,7 @@ import {
40
46
  type ErrorEvent,
41
47
  type FunctionToolsExecutedEvent,
42
48
  type MetricsCollectedEvent,
49
+ type ShutdownReason,
43
50
  type SpeechCreatedEvent,
44
51
  type UserInputTranscribedEvent,
45
52
  type UserState,
@@ -50,6 +57,7 @@ import {
50
57
  createUserStateChangedEvent,
51
58
  } from './events.js';
52
59
  import { AgentInput, AgentOutput } from './io.js';
60
+ import { RecorderIO } from './recorder_io/index.js';
53
61
  import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
54
62
  import type { UnknownUserData } from './run_context.js';
55
63
  import type { SpeechHandle } from './speech_handle.js';
@@ -100,6 +108,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
100
108
  tts?: TTS | TTSModelString;
101
109
  userData?: UserData;
102
110
  voiceOptions?: Partial<VoiceOptions>;
111
+ connOptions?: SessionConnectOptions;
103
112
  };
104
113
 
105
114
  export class AgentSession<
@@ -132,10 +141,20 @@ export class AgentSession<
132
141
  private closingTask: Promise<void> | null = null;
133
142
  private userAwayTimer: NodeJS.Timeout | null = null;
134
143
 
144
+ // Connection options for STT, LLM, and TTS
145
+ private _connOptions: ResolvedSessionConnectOptions;
146
+
147
+ // Unrecoverable error counts, reset after agent speaking
148
+ private llmErrorCounts = 0;
149
+ private ttsErrorCounts = 0;
150
+
135
151
  private sessionSpan?: Span;
136
152
  private userSpeakingSpan?: Span;
137
153
  private agentSpeakingSpan?: Span;
138
154
 
155
+ /** @internal */
156
+ _recorderIO?: RecorderIO;
157
+
139
158
  /** @internal */
140
159
  rootSpanContext?: Context;
141
160
 
@@ -159,8 +178,19 @@ export class AgentSession<
159
178
  turnDetection,
160
179
  userData,
161
180
  voiceOptions = defaultVoiceOptions,
181
+ connOptions,
162
182
  } = opts;
163
183
 
184
+ // Merge user-provided connOptions with defaults
185
+ this._connOptions = {
186
+ sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
187
+ llmConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.llmConnOptions },
188
+ ttsConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.ttsConnOptions },
189
+ maxUnrecoverableErrors:
190
+ connOptions?.maxUnrecoverableErrors ??
191
+ DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors,
192
+ };
193
+
164
194
  this.vad = vad;
165
195
 
166
196
  if (typeof stt === 'string') {
@@ -225,6 +255,11 @@ export class AgentSession<
225
255
  return this._chatCtx;
226
256
  }
227
257
 
258
+ /** Connection options for STT, LLM, and TTS. */
259
+ get connOptions(): ResolvedSessionConnectOptions {
260
+ return this._connOptions;
261
+ }
262
+
228
263
  set userData(value: UserData) {
229
264
  this._userData = value;
230
265
  }
@@ -234,14 +269,12 @@ export class AgentSession<
234
269
  room,
235
270
  inputOptions,
236
271
  outputOptions,
237
- record,
238
272
  span,
239
273
  }: {
240
274
  agent: Agent;
241
275
  room: Room;
242
276
  inputOptions?: Partial<RoomInputOptions>;
243
277
  outputOptions?: Partial<RoomOutputOptions>;
244
- record: boolean;
245
278
  span: Span;
246
279
  }): Promise<void> {
247
280
  span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
@@ -275,20 +308,38 @@ export class AgentSession<
275
308
  });
276
309
  this.roomIO.start();
277
310
 
278
- const ctx = getJobContext();
279
- if (ctx && ctx.room === room && !room.isConnected) {
280
- this.logger.debug('Auto-connecting to room via job context');
281
- tasks.push(ctx.connect());
311
+ let ctx: JobContext | undefined = undefined;
312
+ try {
313
+ ctx = getJobContext();
314
+ } catch (error) {
315
+ // JobContext is not available in evals
282
316
  }
283
317
 
284
- if (record) {
318
+ if (ctx) {
319
+ if (ctx.room === room && !room.isConnected) {
320
+ this.logger.debug('Auto-connecting to room via job context');
321
+ tasks.push(ctx.connect());
322
+ }
323
+
285
324
  if (ctx._primaryAgentSession === undefined) {
286
325
  ctx._primaryAgentSession = this;
287
- } else {
326
+ } else if (this._enableRecording) {
288
327
  throw new Error(
289
- 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
328
+ 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use `session.start({ record: false })`.',
290
329
  );
291
330
  }
331
+
332
+ if (this.input.audio && this.output.audio && this._enableRecording) {
333
+ this._recorderIO = new RecorderIO({ agentSession: this });
334
+ this.input.audio = this._recorderIO.recordInput(this.input.audio);
335
+ this.output.audio = this._recorderIO.recordOutput(this.output.audio);
336
+
337
+ // Start recording to session directory
338
+ const sessionDir = ctx.sessionDirectory;
339
+ if (sessionDir) {
340
+ tasks.push(this._recorderIO.start(`${sessionDir}/audio.ogg`));
341
+ }
342
+ }
292
343
  }
293
344
 
294
345
  // TODO(AJS-265): add shutdown callback to job context
@@ -327,29 +378,28 @@ export class AgentSession<
327
378
  return;
328
379
  }
329
380
 
330
- const ctx = getJobContext();
381
+ let ctx: JobContext | undefined = undefined;
382
+ try {
383
+ ctx = getJobContext();
331
384
 
332
- this.logger.info(
333
- { record, enableRecording: ctx.info.job.enableRecording },
334
- 'Configuring session recording',
335
- );
385
+ if (record === undefined) {
386
+ record = ctx.job.enableRecording;
387
+ }
336
388
 
337
- record = record ?? ctx.info.job.enableRecording;
338
- this._enableRecording = record;
389
+ this._enableRecording = record;
339
390
 
340
- if (this._enableRecording) {
341
- await ctx.initRecording();
391
+ if (this._enableRecording) {
392
+ ctx.initRecording();
393
+ }
394
+ } catch (error) {
395
+ // JobContext is not available in evals
342
396
  }
343
397
 
344
- // Create agent_session as a ROOT span (new trace) to match Python behavior
345
- // This creates a separate trace for better cloud dashboard organization
346
398
  this.sessionSpan = tracer.startSpan({
347
399
  name: 'agent_session',
348
400
  context: ROOT_CONTEXT,
349
401
  });
350
402
 
351
- // Set the session span as the active span in the context
352
- // This ensures all child spans (agent_turn, user_turn, etc.) are parented to it
353
403
  this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
354
404
 
355
405
  await this._startImpl({
@@ -357,7 +407,6 @@ export class AgentSession<
357
407
  room,
358
408
  inputOptions,
359
409
  outputOptions,
360
- record,
361
410
  span: this.sessionSpan,
362
411
  });
363
412
  }
@@ -397,7 +446,17 @@ export class AgentSession<
397
446
  throw new Error('AgentSession is not running');
398
447
  }
399
448
 
400
- return this.activity.say(text, options);
449
+ const doSay = (activity: AgentActivity) => {
450
+ return activity.say(text, options);
451
+ };
452
+
453
+ // attach to the session span if called outside of the AgentSession
454
+ const activeSpan = trace.getActiveSpan();
455
+ if (!activeSpan && this.rootSpanContext) {
456
+ return otelContext.with(this.rootSpanContext, () => doSay(this.activity!));
457
+ }
458
+
459
+ return doSay(this.activity);
401
460
  }
402
461
 
403
462
  interrupt() {
@@ -424,14 +483,25 @@ export class AgentSession<
424
483
  })
425
484
  : undefined;
426
485
 
427
- if (this.activity.draining) {
428
- if (!this.nextActivity) {
429
- throw new Error('AgentSession is closing, cannot use generateReply()');
486
+ const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
487
+ if (activity.draining) {
488
+ if (!nextActivity) {
489
+ throw new Error('AgentSession is closing, cannot use generateReply()');
490
+ }
491
+ return nextActivity.generateReply({ userMessage, ...options });
430
492
  }
431
- return this.nextActivity.generateReply({ userMessage, ...options });
493
+ return activity.generateReply({ userMessage, ...options });
494
+ };
495
+
496
+ // attach to the session span if called outside of the AgentSession
497
+ const activeSpan = trace.getActiveSpan();
498
+ if (!activeSpan && this.rootSpanContext) {
499
+ return otelContext.with(this.rootSpanContext, () =>
500
+ doGenerateReply(this.activity!, this.nextActivity),
501
+ );
432
502
  }
433
503
 
434
- return this.activity.generateReply({ userMessage, ...options });
504
+ return doGenerateReply(this.activity!, this.nextActivity);
435
505
  }
436
506
 
437
507
  private async updateActivity(agent: Agent): Promise<void> {
@@ -492,13 +562,22 @@ export class AgentSession<
492
562
  await this.closeImpl(CloseReason.USER_INITIATED);
493
563
  }
494
564
 
565
+ shutdown(options?: { drain?: boolean; reason?: ShutdownReason }): void {
566
+ const { drain = true, reason = CloseReason.USER_INITIATED } = options ?? {};
567
+
568
+ this._closeSoon({
569
+ reason,
570
+ drain,
571
+ });
572
+ }
573
+
495
574
  /** @internal */
496
575
  _closeSoon({
497
576
  reason,
498
577
  drain = false,
499
578
  error = null,
500
579
  }: {
501
- reason: CloseReason;
580
+ reason: ShutdownReason;
502
581
  drain?: boolean;
503
582
  error?: RealtimeModelError | STTError | TTSError | LLMError | null;
504
583
  }): void {
@@ -514,6 +593,19 @@ export class AgentSession<
514
593
  return;
515
594
  }
516
595
 
596
+ // Track error counts per type to implement max_unrecoverable_errors logic
597
+ if (error.type === 'llm_error') {
598
+ this.llmErrorCounts += 1;
599
+ if (this.llmErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
600
+ return;
601
+ }
602
+ } else if (error.type === 'tts_error') {
603
+ this.ttsErrorCounts += 1;
604
+ if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
605
+ return;
606
+ }
607
+ }
608
+
517
609
  this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
518
610
 
519
611
  this.closingTask = (async () => {
@@ -541,7 +633,9 @@ export class AgentSession<
541
633
  }
542
634
 
543
635
  if (state === 'speaking') {
544
- // TODO(brian): PR4 - Track error counts
636
+ // Reset error counts when agent starts speaking
637
+ this.llmErrorCounts = 0;
638
+ this.ttsErrorCounts = 0;
545
639
 
546
640
  if (this.agentSpeakingSpan === undefined) {
547
641
  this.agentSpeakingSpan = tracer.startSpan({
@@ -657,7 +751,7 @@ export class AgentSession<
657
751
  }
658
752
 
659
753
  private async closeImpl(
660
- reason: CloseReason,
754
+ reason: ShutdownReason,
661
755
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
662
756
  drain: boolean = false,
663
757
  ): Promise<void> {
@@ -671,7 +765,7 @@ export class AgentSession<
671
765
  }
672
766
 
673
767
  private async closeImplInner(
674
- reason: CloseReason,
768
+ reason: ShutdownReason,
675
769
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
676
770
  drain: boolean = false,
677
771
  ): Promise<void> {
@@ -694,7 +788,16 @@ export class AgentSession<
694
788
  await this.activity.drain();
695
789
  // wait any uninterruptible speech to finish
696
790
  await this.activity.currentSpeech?.waitForPlayout();
697
- this.activity.detachAudioInput();
791
+ try {
792
+ this.activity.detachAudioInput();
793
+ } catch (error) {
794
+ // Ignore detach errors during cleanup - source may not have been set
795
+ }
796
+ }
797
+
798
+ // Close recorder before detaching inputs/outputs (keep reference for session report)
799
+ if (this._recorderIO) {
800
+ await this._recorderIO.close();
698
801
  }
699
802
 
700
803
  // detach the inputs and outputs
@@ -730,6 +833,8 @@ export class AgentSession<
730
833
  this.userState = 'listening';
731
834
  this._agentState = 'initializing';
732
835
  this.rootSpanContext = undefined;
836
+ this.llmErrorCounts = 0;
837
+ this.ttsErrorCounts = 0;
733
838
 
734
839
  this.logger.info({ reason, error }, 'AgentSession closed');
735
840
  }
@@ -5,9 +5,10 @@ import type {
5
5
  ChatMessage,
6
6
  FunctionCall,
7
7
  FunctionCallOutput,
8
+ LLM,
9
+ RealtimeModel,
8
10
  RealtimeModelError,
9
11
  } from '../llm/index.js';
10
- import type { LLM, RealtimeModel } from '../llm/index.js';
11
12
  import type { LLMError } from '../llm/llm.js';
12
13
  import type { AgentMetrics } from '../metrics/base.js';
13
14
  import type { STT } from '../stt/index.js';
@@ -38,6 +39,8 @@ export enum CloseReason {
38
39
  USER_INITIATED = 'user_initiated',
39
40
  }
40
41
 
42
+ export type ShutdownReason = CloseReason | string;
43
+
41
44
  export type SpeechSource = 'say' | 'generate_reply' | 'tool_response';
42
45
 
43
46
  export type UserStateChangedEvent = {
@@ -231,12 +234,12 @@ export const createErrorEvent = (
231
234
  export type CloseEvent = {
232
235
  type: 'close';
233
236
  error: RealtimeModelError | STTError | TTSError | LLMError | null;
234
- reason: CloseReason;
237
+ reason: ShutdownReason;
235
238
  createdAt: number;
236
239
  };
237
240
 
238
241
  export const createCloseEvent = (
239
- reason: CloseReason,
242
+ reason: ShutdownReason,
240
243
  error: RealtimeModelError | STTError | TTSError | LLMError | null = null,
241
244
  createdAt: number = Date.now(),
242
245
  ): CloseEvent => ({
@@ -24,7 +24,7 @@ import { isZodSchema, parseZodSchema } from '../llm/zod-utils.js';
24
24
  import { log } from '../log.js';
25
25
  import { IdentityTransform } from '../stream/identity_transform.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
- import { Future, Task, shortuuid, toError } from '../utils.js';
27
+ import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
28
28
  import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
29
29
  import type { AgentSession } from './agent_session.js';
30
30
  import type { AudioOutput, LLMNode, TTSNode, TextOutput } from './io.js';
@@ -411,17 +411,19 @@ export function performLLMInference(
411
411
  return;
412
412
  }
413
413
 
414
+ const abortPromise = waitForAbort(signal);
415
+
414
416
  // TODO(brian): add support for dynamic tools
415
417
 
416
418
  llmStreamReader = llmStream.getReader();
417
419
  while (true) {
418
- if (signal.aborted) {
419
- break;
420
- }
421
- const { done, value: chunk } = await llmStreamReader.read();
422
- if (done) {
423
- break;
424
- }
420
+ if (signal.aborted) break;
421
+
422
+ const result = await Promise.race([llmStreamReader.read(), abortPromise]);
423
+ if (result === undefined) break;
424
+
425
+ const { done, value: chunk } = result;
426
+ if (done) break;
425
427
 
426
428
  if (typeof chunk === 'string') {
427
429
  data.generatedText += chunk;
package/src/voice/io.ts CHANGED
@@ -28,6 +28,7 @@ export type TTSNode = (
28
28
  text: ReadableStream<string>,
29
29
  modelSettings: ModelSettings,
30
30
  ) => Promise<ReadableStream<AudioFrame> | null>;
31
+
31
32
  export abstract class AudioInput {
32
33
  protected deferredStream: DeferredReadableStream<AudioFrame> =
33
34
  new DeferredReadableStream<AudioFrame>();
@@ -128,6 +129,24 @@ export abstract class AudioOutput extends EventEmitter {
128
129
  this.nextInChain.onDetached();
129
130
  }
130
131
  }
132
+
133
+ /**
134
+ * Pause the audio playback
135
+ */
136
+ pause(): void {
137
+ if (this.nextInChain) {
138
+ this.nextInChain.pause();
139
+ }
140
+ }
141
+
142
+ /**
143
+ * Resume the audio playback
144
+ */
145
+ resume(): void {
146
+ if (this.nextInChain) {
147
+ this.nextInChain.resume();
148
+ }
149
+ }
131
150
  }
132
151
 
133
152
  export interface PlaybackFinishedEvent {
@@ -0,0 +1,4 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export * from './recorder_io.js';