@livekit/agents 1.0.46 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/dist/cli.cjs +14 -20
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +14 -20
  5. package/dist/cli.js.map +1 -1
  6. package/dist/ipc/job_proc_lazy_main.cjs +14 -5
  7. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  8. package/dist/ipc/job_proc_lazy_main.js +14 -5
  9. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  10. package/dist/llm/chat_context.cjs +19 -0
  11. package/dist/llm/chat_context.cjs.map +1 -1
  12. package/dist/llm/chat_context.d.cts +4 -0
  13. package/dist/llm/chat_context.d.ts +4 -0
  14. package/dist/llm/chat_context.d.ts.map +1 -1
  15. package/dist/llm/chat_context.js +19 -0
  16. package/dist/llm/chat_context.js.map +1 -1
  17. package/dist/llm/provider_format/index.cjs +2 -0
  18. package/dist/llm/provider_format/index.cjs.map +1 -1
  19. package/dist/llm/provider_format/index.d.cts +1 -1
  20. package/dist/llm/provider_format/index.d.ts +1 -1
  21. package/dist/llm/provider_format/index.d.ts.map +1 -1
  22. package/dist/llm/provider_format/index.js +6 -1
  23. package/dist/llm/provider_format/index.js.map +1 -1
  24. package/dist/llm/provider_format/openai.cjs +82 -2
  25. package/dist/llm/provider_format/openai.cjs.map +1 -1
  26. package/dist/llm/provider_format/openai.d.cts +1 -0
  27. package/dist/llm/provider_format/openai.d.ts +1 -0
  28. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  29. package/dist/llm/provider_format/openai.js +80 -1
  30. package/dist/llm/provider_format/openai.js.map +1 -1
  31. package/dist/llm/provider_format/openai.test.cjs +326 -0
  32. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  33. package/dist/llm/provider_format/openai.test.js +327 -1
  34. package/dist/llm/provider_format/openai.test.js.map +1 -1
  35. package/dist/llm/provider_format/utils.cjs +4 -3
  36. package/dist/llm/provider_format/utils.cjs.map +1 -1
  37. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  38. package/dist/llm/provider_format/utils.js +4 -3
  39. package/dist/llm/provider_format/utils.js.map +1 -1
  40. package/dist/llm/realtime.cjs.map +1 -1
  41. package/dist/llm/realtime.d.cts +1 -0
  42. package/dist/llm/realtime.d.ts +1 -0
  43. package/dist/llm/realtime.d.ts.map +1 -1
  44. package/dist/llm/realtime.js.map +1 -1
  45. package/dist/log.cjs +5 -2
  46. package/dist/log.cjs.map +1 -1
  47. package/dist/log.d.ts.map +1 -1
  48. package/dist/log.js +5 -2
  49. package/dist/log.js.map +1 -1
  50. package/dist/stream/deferred_stream.cjs +15 -6
  51. package/dist/stream/deferred_stream.cjs.map +1 -1
  52. package/dist/stream/deferred_stream.d.ts.map +1 -1
  53. package/dist/stream/deferred_stream.js +15 -6
  54. package/dist/stream/deferred_stream.js.map +1 -1
  55. package/dist/utils.cjs +31 -2
  56. package/dist/utils.cjs.map +1 -1
  57. package/dist/utils.d.cts +7 -0
  58. package/dist/utils.d.ts +7 -0
  59. package/dist/utils.d.ts.map +1 -1
  60. package/dist/utils.js +31 -2
  61. package/dist/utils.js.map +1 -1
  62. package/dist/utils.test.cjs +71 -0
  63. package/dist/utils.test.cjs.map +1 -1
  64. package/dist/utils.test.js +71 -0
  65. package/dist/utils.test.js.map +1 -1
  66. package/dist/version.cjs +1 -1
  67. package/dist/version.cjs.map +1 -1
  68. package/dist/version.d.cts +1 -1
  69. package/dist/version.d.ts +1 -1
  70. package/dist/version.d.ts.map +1 -1
  71. package/dist/version.js +1 -1
  72. package/dist/version.js.map +1 -1
  73. package/dist/voice/agent.cjs +144 -12
  74. package/dist/voice/agent.cjs.map +1 -1
  75. package/dist/voice/agent.d.cts +29 -4
  76. package/dist/voice/agent.d.ts +29 -4
  77. package/dist/voice/agent.d.ts.map +1 -1
  78. package/dist/voice/agent.js +140 -11
  79. package/dist/voice/agent.js.map +1 -1
  80. package/dist/voice/agent.test.cjs +120 -0
  81. package/dist/voice/agent.test.cjs.map +1 -1
  82. package/dist/voice/agent.test.js +122 -2
  83. package/dist/voice/agent.test.js.map +1 -1
  84. package/dist/voice/agent_activity.cjs +383 -298
  85. package/dist/voice/agent_activity.cjs.map +1 -1
  86. package/dist/voice/agent_activity.d.cts +34 -7
  87. package/dist/voice/agent_activity.d.ts +34 -7
  88. package/dist/voice/agent_activity.d.ts.map +1 -1
  89. package/dist/voice/agent_activity.js +383 -293
  90. package/dist/voice/agent_activity.js.map +1 -1
  91. package/dist/voice/agent_session.cjs +140 -40
  92. package/dist/voice/agent_session.cjs.map +1 -1
  93. package/dist/voice/agent_session.d.cts +19 -7
  94. package/dist/voice/agent_session.d.ts +19 -7
  95. package/dist/voice/agent_session.d.ts.map +1 -1
  96. package/dist/voice/agent_session.js +137 -37
  97. package/dist/voice/agent_session.js.map +1 -1
  98. package/dist/voice/audio_recognition.cjs +4 -0
  99. package/dist/voice/audio_recognition.cjs.map +1 -1
  100. package/dist/voice/audio_recognition.d.ts.map +1 -1
  101. package/dist/voice/audio_recognition.js +4 -0
  102. package/dist/voice/audio_recognition.js.map +1 -1
  103. package/dist/voice/generation.cjs +39 -19
  104. package/dist/voice/generation.cjs.map +1 -1
  105. package/dist/voice/generation.d.ts.map +1 -1
  106. package/dist/voice/generation.js +44 -20
  107. package/dist/voice/generation.js.map +1 -1
  108. package/dist/voice/index.cjs +2 -0
  109. package/dist/voice/index.cjs.map +1 -1
  110. package/dist/voice/index.d.cts +1 -1
  111. package/dist/voice/index.d.ts +1 -1
  112. package/dist/voice/index.d.ts.map +1 -1
  113. package/dist/voice/index.js +2 -1
  114. package/dist/voice/index.js.map +1 -1
  115. package/dist/voice/speech_handle.cjs +7 -1
  116. package/dist/voice/speech_handle.cjs.map +1 -1
  117. package/dist/voice/speech_handle.d.cts +2 -0
  118. package/dist/voice/speech_handle.d.ts +2 -0
  119. package/dist/voice/speech_handle.d.ts.map +1 -1
  120. package/dist/voice/speech_handle.js +8 -2
  121. package/dist/voice/speech_handle.js.map +1 -1
  122. package/dist/voice/testing/run_result.cjs +66 -15
  123. package/dist/voice/testing/run_result.cjs.map +1 -1
  124. package/dist/voice/testing/run_result.d.cts +14 -3
  125. package/dist/voice/testing/run_result.d.ts +14 -3
  126. package/dist/voice/testing/run_result.d.ts.map +1 -1
  127. package/dist/voice/testing/run_result.js +66 -15
  128. package/dist/voice/testing/run_result.js.map +1 -1
  129. package/package.json +1 -1
  130. package/src/cli.ts +20 -33
  131. package/src/ipc/job_proc_lazy_main.ts +16 -5
  132. package/src/llm/chat_context.ts +35 -0
  133. package/src/llm/provider_format/index.ts +7 -2
  134. package/src/llm/provider_format/openai.test.ts +385 -1
  135. package/src/llm/provider_format/openai.ts +103 -0
  136. package/src/llm/provider_format/utils.ts +6 -4
  137. package/src/llm/realtime.ts +1 -0
  138. package/src/log.ts +5 -2
  139. package/src/stream/deferred_stream.ts +17 -6
  140. package/src/utils.test.ts +87 -0
  141. package/src/utils.ts +36 -2
  142. package/src/version.ts +1 -1
  143. package/src/voice/agent.test.ts +140 -2
  144. package/src/voice/agent.ts +189 -10
  145. package/src/voice/agent_activity.ts +427 -289
  146. package/src/voice/agent_session.ts +178 -40
  147. package/src/voice/audio_recognition.ts +4 -0
  148. package/src/voice/generation.ts +52 -23
  149. package/src/voice/index.ts +1 -1
  150. package/src/voice/speech_handle.ts +9 -2
  151. package/src/voice/testing/run_result.ts +81 -23
@@ -1,12 +1,14 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Mutex } from '@livekit/mutex';
4
5
  import type { AudioFrame, Room } from '@livekit/rtc-node';
5
6
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
6
7
  import type { Context, Span } from '@opentelemetry/api';
7
8
  import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
8
9
  import { EventEmitter } from 'node:events';
9
10
  import type { ReadableStream } from 'node:stream/web';
11
+ import type { z } from 'zod';
10
12
  import {
11
13
  LLM as InferenceLLM,
12
14
  STT as InferenceSTT,
@@ -31,6 +33,7 @@ import {
31
33
  type ResolvedSessionConnectOptions,
32
34
  type SessionConnectOptions,
33
35
  } from '../types.js';
36
+ import { Task } from '../utils.js';
34
37
  import type { VAD } from '../vad.js';
35
38
  import type { Agent } from './agent.js';
36
39
  import { AgentActivity } from './agent_activity.js';
@@ -115,6 +118,13 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
115
118
  connOptions?: SessionConnectOptions;
116
119
  };
117
120
 
121
+ type ActivityTransitionOptions = {
122
+ previousActivity?: 'close' | 'pause';
123
+ newActivity?: 'start' | 'resume';
124
+ blockedTasks?: Task<any>[];
125
+ waitOnEnter?: boolean;
126
+ };
127
+
118
128
  export class AgentSession<
119
129
  UserData = UnknownUserData,
120
130
  > extends (EventEmitter as new () => TypedEmitter<AgentSessionCallbacks>) {
@@ -129,8 +139,10 @@ export class AgentSession<
129
139
  private agent?: Agent;
130
140
  private activity?: AgentActivity;
131
141
  private nextActivity?: AgentActivity;
142
+ private updateActivityTask?: Task<void>;
132
143
  private started = false;
133
144
  private userState: UserState = 'listening';
145
+ private readonly activityLock = new Mutex();
134
146
 
135
147
  /** @internal */
136
148
  _roomIO?: RoomIO;
@@ -360,7 +372,8 @@ export class AgentSession<
360
372
  }
361
373
 
362
374
  // TODO(AJS-265): add shutdown callback to job context
363
- tasks.push(this.updateActivity(this.agent));
375
+ // Initial start does not wait on onEnter
376
+ tasks.push(this._updateActivity(this.agent, { waitOnEnter: false }));
364
377
 
365
378
  await Promise.allSettled(tasks);
366
379
 
@@ -432,8 +445,34 @@ export class AgentSession<
432
445
  updateAgent(agent: Agent): void {
433
446
  this.agent = agent;
434
447
 
435
- if (this.started) {
436
- this.updateActivity(agent);
448
+ if (!this.started) {
449
+ return;
450
+ }
451
+
452
+ const _updateActivityTask = async (oldTask: Task<void> | undefined, agent: Agent) => {
453
+ if (oldTask) {
454
+ try {
455
+ await oldTask.result;
456
+ } catch (error) {
457
+ this.logger.error(error, 'previous updateAgent transition failed');
458
+ }
459
+ }
460
+
461
+ await this._updateActivity(agent);
462
+ };
463
+
464
+ const oldTask = this.updateActivityTask;
465
+ this.updateActivityTask = Task.from(
466
+ async () => _updateActivityTask(oldTask, agent),
467
+ undefined,
468
+ 'AgentSession_updateActivityTask',
469
+ );
470
+
471
+ const runState = this._globalRunState;
472
+ if (runState) {
473
+ // Don't mark the RunResult as done, if there is currently an agent transition happening.
474
+ // (used to make sure we're correctly adding the AgentHandoffResult before completion)
475
+ runState._watchHandle(this.updateActivityTask);
437
476
  }
438
477
  }
439
478
 
@@ -464,24 +503,42 @@ export class AgentSession<
464
503
  throw new Error('AgentSession is not running');
465
504
  }
466
505
 
467
- const doSay = (activity: AgentActivity) => {
506
+ const doSay = (activity: AgentActivity, nextActivity?: AgentActivity) => {
507
+ if (activity.schedulingPaused) {
508
+ if (!nextActivity) {
509
+ throw new Error('AgentSession is closing, cannot use say()');
510
+ }
511
+ return nextActivity.say(text, options);
512
+ }
468
513
  return activity.say(text, options);
469
514
  };
470
515
 
516
+ const runState = this._globalRunState;
517
+ let handle: SpeechHandle;
518
+
471
519
  // attach to the session span if called outside of the AgentSession
472
520
  const activeSpan = trace.getActiveSpan();
473
521
  if (!activeSpan && this.rootSpanContext) {
474
- return otelContext.with(this.rootSpanContext, () => doSay(this.activity!));
522
+ handle = otelContext.with(this.rootSpanContext, () =>
523
+ doSay(this.activity!, this.nextActivity),
524
+ );
525
+ } else {
526
+ handle = doSay(this.activity, this.nextActivity);
527
+ }
528
+
529
+ if (runState) {
530
+ runState._watchHandle(handle);
475
531
  }
476
532
 
477
- return doSay(this.activity);
533
+ return handle;
478
534
  }
479
535
 
480
- interrupt() {
536
+ interrupt(options?: { force?: boolean }) {
481
537
  if (!this.activity) {
482
538
  throw new Error('AgentSession is not running');
483
539
  }
484
- return this.activity.interrupt();
540
+
541
+ return this.activity.interrupt(options);
485
542
  }
486
543
 
487
544
  generateReply(options?: {
@@ -502,7 +559,7 @@ export class AgentSession<
502
559
  : undefined;
503
560
 
504
561
  const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
505
- if (activity.draining) {
562
+ if (activity.schedulingPaused) {
506
563
  if (!nextActivity) {
507
564
  throw new Error('AgentSession is closing, cannot use generateReply()');
508
565
  }
@@ -542,53 +599,128 @@ export class AgentSession<
542
599
  * result.expect.noMoreEvents();
543
600
  * ```
544
601
  *
545
- * @param options - Run options including user input
602
+ * @param options - Run options including user input and optional output type
546
603
  * @returns A RunResult that resolves when the agent finishes responding
547
- *
548
- * TODO: Add outputType parameter for typed outputs (parity with Python)
549
604
  */
550
- run(options: { userInput: string }): RunResult {
605
+ run<T = unknown>({
606
+ userInput,
607
+ outputType,
608
+ }: {
609
+ userInput: string;
610
+ outputType?: z.ZodType<T>;
611
+ }): RunResult<T> {
551
612
  if (this._globalRunState && !this._globalRunState.done()) {
552
613
  throw new Error('nested runs are not supported');
553
614
  }
554
615
 
555
- const runState = new RunResult({ userInput: options.userInput });
616
+ const runState = new RunResult<T>({
617
+ userInput,
618
+ outputType,
619
+ });
620
+
556
621
  this._globalRunState = runState;
557
- this.generateReply({ userInput: options.userInput });
622
+
623
+ // Defer generateReply through the activityLock to ensure any in-progress
624
+ // activity transition (e.g. AgentTask started from onEnter) completes first.
625
+ // TS Task.from starts onEnter synchronously, so the transition may already be
626
+ // mid-flight by the time run() is called after session.start() resolves.
627
+ // Acquiring and immediately releasing the lock guarantees FIFO ordering:
628
+ // the transition's lock section finishes before we route generateReply.
629
+ (async () => {
630
+ try {
631
+ const unlock = await this.activityLock.lock();
632
+ unlock();
633
+ this.generateReply({ userInput });
634
+ } catch (e) {
635
+ runState._reject(e instanceof Error ? e : new Error(String(e)));
636
+ }
637
+ })();
558
638
 
559
639
  return runState;
560
640
  }
561
641
 
562
- private async updateActivity(agent: Agent): Promise<void> {
642
+ /** @internal */
643
+ async _updateActivity(agent: Agent, options: ActivityTransitionOptions = {}): Promise<void> {
644
+ const { previousActivity = 'close', newActivity = 'start', blockedTasks = [] } = options;
645
+ const waitOnEnter = options.waitOnEnter ?? newActivity === 'start';
646
+
563
647
  const runWithContext = async () => {
564
- // TODO(AJS-129): add lock to agent activity core lifecycle
565
- this.nextActivity = new AgentActivity(agent, this);
648
+ const unlock = await this.activityLock.lock();
649
+ let onEnterTask: Task<void> | undefined;
566
650
 
567
- const previousActivity = this.activity;
651
+ try {
652
+ this.agent = agent;
653
+ const prevActivityObj = this.activity;
654
+
655
+ if (newActivity === 'start') {
656
+ const prevAgent = prevActivityObj?.agent;
657
+ if (
658
+ agent._agentActivity &&
659
+ // allow updating the same agent that is running
660
+ (agent !== prevAgent || previousActivity !== 'close')
661
+ ) {
662
+ throw new Error('Cannot start agent: an activity is already running');
663
+ }
664
+ this.nextActivity = new AgentActivity(agent, this);
665
+ } else if (newActivity === 'resume') {
666
+ if (!agent._agentActivity) {
667
+ throw new Error('Cannot resume agent: no existing activity to resume');
668
+ }
669
+ this.nextActivity = agent._agentActivity;
670
+ }
568
671
 
569
- if (this.activity) {
570
- await this.activity.drain();
571
- await this.activity.close();
572
- }
672
+ if (prevActivityObj && prevActivityObj !== this.nextActivity) {
673
+ if (previousActivity === 'pause') {
674
+ await prevActivityObj.pause({ blockedTasks });
675
+ } else {
676
+ await prevActivityObj.drain();
677
+ await prevActivityObj.close();
678
+ }
679
+ }
573
680
 
574
- this.activity = this.nextActivity;
575
- this.nextActivity = undefined;
681
+ this.activity = this.nextActivity;
682
+ this.nextActivity = undefined;
576
683
 
577
- this._chatCtx.insert(
578
- new AgentHandoffItem({
579
- oldAgentId: previousActivity?.agent.id,
684
+ const runState = this._globalRunState;
685
+ const handoffItem = new AgentHandoffItem({
686
+ oldAgentId: prevActivityObj?.agent.id,
580
687
  newAgentId: agent.id,
581
- }),
582
- );
583
- this.logger.debug(
584
- { previousAgentId: previousActivity?.agent.id, newAgentId: agent.id },
585
- 'Agent handoff inserted into chat context',
586
- );
688
+ });
587
689
 
588
- await this.activity.start();
690
+ if (runState) {
691
+ runState._agentHandoff({
692
+ item: handoffItem,
693
+ oldAgent: prevActivityObj?.agent,
694
+ newAgent: this.activity!.agent,
695
+ });
696
+ }
697
+
698
+ this._chatCtx.insert(handoffItem);
699
+ this.logger.debug(
700
+ { previousAgentId: prevActivityObj?.agent.id, newAgentId: agent.id },
701
+ 'Agent handoff inserted into chat context',
702
+ );
703
+
704
+ if (newActivity === 'start') {
705
+ await this.activity!.start();
706
+ } else {
707
+ await this.activity!.resume();
708
+ }
709
+
710
+ onEnterTask = this.activity!._onEnterTask;
711
+
712
+ if (this._input.audio) {
713
+ this.activity!.attachAudioInput(this._input.audio.stream);
714
+ }
715
+ } finally {
716
+ unlock();
717
+ }
589
718
 
590
- if (this._input.audio) {
591
- this.activity.attachAudioInput(this._input.audio.stream);
719
+ if (waitOnEnter) {
720
+ if (!onEnterTask) {
721
+ throw new Error('expected onEnter task to be available while waitOnEnter=true');
722
+ }
723
+ await onEnterTask.result;
592
724
  }
593
725
  };
594
726
 
@@ -842,15 +974,21 @@ export class AgentSession<
842
974
  if (this.activity) {
843
975
  if (!drain) {
844
976
  try {
845
- this.activity.interrupt();
977
+ await this.activity.interrupt({ force: true }).await;
846
978
  } catch (error) {
847
- // TODO(shubhra): force interrupt or wait for it to finish?
848
- // it might be an audio played from the error callback
979
+ // Uninterruptible speech can throw during forced interruption.
980
+ this.logger.warn({ error }, 'Error interrupting activity');
849
981
  }
850
982
  }
983
+
851
984
  await this.activity.drain();
852
985
  // wait any uninterruptible speech to finish
853
986
  await this.activity.currentSpeech?.waitForPlayout();
987
+
988
+ if (reason !== CloseReason.ERROR) {
989
+ this.activity.commitUserTurn({ audioDetached: true, throwIfNotReady: false });
990
+ }
991
+
854
992
  try {
855
993
  this.activity.detachAudioInput();
856
994
  } catch (error) {
@@ -768,6 +768,10 @@ export class AudioRecognition {
768
768
  this.logger.debug('User turn committed');
769
769
  })
770
770
  .catch((err: unknown) => {
771
+ if (err instanceof Error && err.name === 'AbortError') {
772
+ this.logger.debug('User turn commit task cancelled');
773
+ return;
774
+ }
771
775
  this.logger.error(err, 'Error in user turn commit task:');
772
776
  });
773
777
  }
@@ -26,7 +26,13 @@ import { IdentityTransform } from '../stream/identity_transform.js';
26
26
  import { traceTypes, tracer } from '../telemetry/index.js';
27
27
  import { USERDATA_TIMED_TRANSCRIPT } from '../types.js';
28
28
  import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
29
- import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
29
+ import {
30
+ type Agent,
31
+ type ModelSettings,
32
+ _setActivityTaskInfo,
33
+ functionCallStorage,
34
+ isStopResponse,
35
+ } from './agent.js';
30
36
  import type { AgentSession } from './agent_session.js';
31
37
  import {
32
38
  AudioOutput,
@@ -719,7 +725,7 @@ export interface _AudioOut {
719
725
 
720
726
  async function forwardAudio(
721
727
  ttsStream: ReadableStream<AudioFrame>,
722
- audioOuput: AudioOutput,
728
+ audioOutput: AudioOutput,
723
729
  out: _AudioOut,
724
730
  signal?: AbortSignal,
725
731
  ): Promise<void> {
@@ -733,8 +739,8 @@ async function forwardAudio(
733
739
  };
734
740
 
735
741
  try {
736
- audioOuput.on(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
737
- audioOuput.resume();
742
+ audioOutput.on(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
743
+ audioOutput.resume();
738
744
 
739
745
  while (true) {
740
746
  if (signal?.aborted) {
@@ -748,36 +754,36 @@ async function forwardAudio(
748
754
 
749
755
  if (
750
756
  !out.firstFrameFut.done &&
751
- audioOuput.sampleRate &&
752
- audioOuput.sampleRate !== frame.sampleRate &&
757
+ audioOutput.sampleRate &&
758
+ audioOutput.sampleRate !== frame.sampleRate &&
753
759
  !resampler
754
760
  ) {
755
- resampler = new AudioResampler(frame.sampleRate, audioOuput.sampleRate, 1);
761
+ resampler = new AudioResampler(frame.sampleRate, audioOutput.sampleRate, 1);
756
762
  }
757
763
 
758
764
  if (resampler) {
759
765
  for (const f of resampler.push(frame)) {
760
- await audioOuput.captureFrame(f);
766
+ await audioOutput.captureFrame(f);
761
767
  }
762
768
  } else {
763
- await audioOuput.captureFrame(frame);
769
+ await audioOutput.captureFrame(frame);
764
770
  }
765
771
  }
766
772
 
767
773
  if (resampler) {
768
774
  for (const f of resampler.flush()) {
769
- await audioOuput.captureFrame(f);
775
+ await audioOutput.captureFrame(f);
770
776
  }
771
777
  }
772
778
  } finally {
773
- audioOuput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
779
+ audioOutput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
774
780
 
775
781
  if (!out.firstFrameFut.done) {
776
782
  out.firstFrameFut.reject(new Error('audio forwarding cancelled before playback started'));
777
783
  }
778
784
 
779
785
  reader?.releaseLock();
780
- audioOuput.flush();
786
+ audioOutput.flush();
781
787
  }
782
788
  }
783
789
 
@@ -836,7 +842,7 @@ export function performToolExecutions({
836
842
  const signal = controller.signal;
837
843
  const reader = toolCallStream.getReader();
838
844
 
839
- const tasks: Promise<any>[] = [];
845
+ const tasks: Task<void>[] = [];
840
846
  while (!signal.aborted) {
841
847
  const { done, value: toolCall } = await reader.read();
842
848
  if (signal.aborted) break;
@@ -929,14 +935,6 @@ export function performToolExecutions({
929
935
  'Executing LLM tool call',
930
936
  );
931
937
 
932
- const toolExecution = asyncLocalStorage.run({ functionCall: toolCall }, async () => {
933
- return await tool.execute(parsedArgs, {
934
- ctx: new RunContext(session, speechHandle, toolCall),
935
- toolCallId: toolCall.callId,
936
- abortSignal: signal,
937
- });
938
- });
939
-
940
938
  const _tracableToolExecutionImpl = async (toolExecTask: Promise<unknown>, span: Span) => {
941
939
  span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_NAME, toolCall.name);
942
940
  span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_ARGS, toolCall.args);
@@ -993,11 +991,42 @@ export function performToolExecutions({
993
991
  name: 'function_tool',
994
992
  });
995
993
 
994
+ const toolTask = Task.from(
995
+ async () => {
996
+ // Ensure this task is marked inline before user tool code executes.
997
+ const currentTask = Task.current();
998
+ if (currentTask) {
999
+ _setActivityTaskInfo(currentTask, {
1000
+ speechHandle,
1001
+ functionCall: toolCall,
1002
+ inlineTask: true,
1003
+ });
1004
+ }
1005
+
1006
+ const toolExecution = functionCallStorage.run({ functionCall: toolCall }, async () => {
1007
+ return await tool.execute(parsedArgs, {
1008
+ ctx: new RunContext(session, speechHandle, toolCall),
1009
+ toolCallId: toolCall.callId,
1010
+ abortSignal: signal,
1011
+ });
1012
+ });
1013
+
1014
+ await tracableToolExecution(toolExecution);
1015
+ },
1016
+ controller,
1017
+ `performToolExecution:${toolCall.name}`,
1018
+ );
1019
+
1020
+ _setActivityTaskInfo(toolTask, {
1021
+ speechHandle,
1022
+ functionCall: toolCall,
1023
+ inlineTask: true,
1024
+ });
996
1025
  // wait, not cancelling all tool calling tasks
997
- tasks.push(tracableToolExecution(toolExecution));
1026
+ tasks.push(toolTask);
998
1027
  }
999
1028
 
1000
- await Promise.allSettled(tasks);
1029
+ await Promise.allSettled(tasks.map((task) => task.result));
1001
1030
  if (toolOutput.output.length > 0) {
1002
1031
  logger.debug(
1003
1032
  {
@@ -1,7 +1,7 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
4
+ export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
5
5
  export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
6
6
  export * from './avatar/index.js';
7
7
  export * from './background_audio.js';
@@ -5,7 +5,7 @@ import type { Context } from '@opentelemetry/api';
5
5
  import type { ChatItem } from '../llm/index.js';
6
6
  import type { Task } from '../utils.js';
7
7
  import { Event, Future, shortuuid } from '../utils.js';
8
- import { asyncLocalStorage } from './agent.js';
8
+ import { functionCallStorage } from './agent.js';
9
9
 
10
10
  /** Symbol used to identify SpeechHandle instances */
11
11
  const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
@@ -46,6 +46,9 @@ export class SpeechHandle {
46
46
  /** @internal - OpenTelemetry context for the agent turn span */
47
47
  _agentTurnContext?: Context;
48
48
 
49
+ /** @internal - used by AgentTask/RunResult final output plumbing */
50
+ _maybeRunFinalOutput?: unknown;
51
+
49
52
  private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
50
53
  private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
51
54
 
@@ -148,7 +151,7 @@ export class SpeechHandle {
148
151
  * has entirely played out, including any tool calls and response follow-ups.
149
152
  */
150
153
  async waitForPlayout(): Promise<void> {
151
- const store = asyncLocalStorage.getStore();
154
+ const store = functionCallStorage.getStore();
152
155
  if (store && store?.functionCall) {
153
156
  throw new Error(
154
157
  `Cannot call 'SpeechHandle.waitForPlayout()' from inside the function tool '${store.functionCall.name}'. ` +
@@ -167,6 +170,10 @@ export class SpeechHandle {
167
170
  }
168
171
 
169
172
  addDoneCallback(callback: (sh: SpeechHandle) => void) {
173
+ if (this.done()) {
174
+ queueMicrotask(() => callback(this));
175
+ return;
176
+ }
170
177
  this.doneCallbacks.add(callback);
171
178
  }
172
179