npm - @livekit/agents - Versions diffs - 1.0.45 → 1.0.47 - Mend

@livekit/agents 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

package/dist/cli.cjs +14 -20
package/dist/cli.cjs.map +1 -1
package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +14 -20
package/dist/cli.js.map +1 -1
package/dist/ipc/job_proc_lazy_main.cjs +14 -5
package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.js +14 -5
package/dist/ipc/job_proc_lazy_main.js.map +1 -1
package/dist/llm/chat_context.cjs +19 -0
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +4 -0
package/dist/llm/chat_context.d.ts +4 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +19 -0
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/provider_format/index.cjs +2 -0
package/dist/llm/provider_format/index.cjs.map +1 -1
package/dist/llm/provider_format/index.d.cts +1 -1
package/dist/llm/provider_format/index.d.ts +1 -1
package/dist/llm/provider_format/index.d.ts.map +1 -1
package/dist/llm/provider_format/index.js +6 -1
package/dist/llm/provider_format/index.js.map +1 -1
package/dist/llm/provider_format/openai.cjs +82 -2
package/dist/llm/provider_format/openai.cjs.map +1 -1
package/dist/llm/provider_format/openai.d.cts +1 -0
package/dist/llm/provider_format/openai.d.ts +1 -0
package/dist/llm/provider_format/openai.d.ts.map +1 -1
package/dist/llm/provider_format/openai.js +80 -1
package/dist/llm/provider_format/openai.js.map +1 -1
package/dist/llm/provider_format/openai.test.cjs +326 -0
package/dist/llm/provider_format/openai.test.cjs.map +1 -1
package/dist/llm/provider_format/openai.test.js +327 -1
package/dist/llm/provider_format/openai.test.js.map +1 -1
package/dist/llm/provider_format/utils.cjs +4 -3
package/dist/llm/provider_format/utils.cjs.map +1 -1
package/dist/llm/provider_format/utils.d.ts.map +1 -1
package/dist/llm/provider_format/utils.js +4 -3
package/dist/llm/provider_format/utils.js.map +1 -1
package/dist/llm/realtime.cjs.map +1 -1
package/dist/llm/realtime.d.cts +1 -0
package/dist/llm/realtime.d.ts +1 -0
package/dist/llm/realtime.d.ts.map +1 -1
package/dist/llm/realtime.js.map +1 -1
package/dist/log.cjs +5 -2
package/dist/log.cjs.map +1 -1
package/dist/log.d.ts.map +1 -1
package/dist/log.js +5 -2
package/dist/log.js.map +1 -1
package/dist/stream/deferred_stream.cjs +15 -6
package/dist/stream/deferred_stream.cjs.map +1 -1
package/dist/stream/deferred_stream.d.ts.map +1 -1
package/dist/stream/deferred_stream.js +15 -6
package/dist/stream/deferred_stream.js.map +1 -1
package/dist/stream/index.cjs +3 -0
package/dist/stream/index.cjs.map +1 -1
package/dist/stream/index.d.cts +1 -0
package/dist/stream/index.d.ts +1 -0
package/dist/stream/index.d.ts.map +1 -1
package/dist/stream/index.js +2 -0
package/dist/stream/index.js.map +1 -1
package/dist/stream/multi_input_stream.cjs +139 -0
package/dist/stream/multi_input_stream.cjs.map +1 -0
package/dist/stream/multi_input_stream.d.cts +55 -0
package/dist/stream/multi_input_stream.d.ts +55 -0
package/dist/stream/multi_input_stream.d.ts.map +1 -0
package/dist/stream/multi_input_stream.js +115 -0
package/dist/stream/multi_input_stream.js.map +1 -0
package/dist/stream/multi_input_stream.test.cjs +340 -0
package/dist/stream/multi_input_stream.test.cjs.map +1 -0
package/dist/stream/multi_input_stream.test.js +339 -0
package/dist/stream/multi_input_stream.test.js.map +1 -0
package/dist/telemetry/trace_types.cjs +42 -0
package/dist/telemetry/trace_types.cjs.map +1 -1
package/dist/telemetry/trace_types.d.cts +14 -0
package/dist/telemetry/trace_types.d.ts +14 -0
package/dist/telemetry/trace_types.d.ts.map +1 -1
package/dist/telemetry/trace_types.js +28 -0
package/dist/telemetry/trace_types.js.map +1 -1
package/dist/utils.cjs +44 -2
package/dist/utils.cjs.map +1 -1
package/dist/utils.d.cts +8 -0
package/dist/utils.d.ts +8 -0
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js +44 -2
package/dist/utils.js.map +1 -1
package/dist/utils.test.cjs +71 -0
package/dist/utils.test.cjs.map +1 -1
package/dist/utils.test.js +71 -0
package/dist/utils.test.js.map +1 -1
package/dist/version.cjs +1 -1
package/dist/version.cjs.map +1 -1
package/dist/version.d.cts +1 -1
package/dist/version.d.ts +1 -1
package/dist/version.d.ts.map +1 -1
package/dist/version.js +1 -1
package/dist/version.js.map +1 -1
package/dist/voice/agent.cjs +144 -12
package/dist/voice/agent.cjs.map +1 -1
package/dist/voice/agent.d.cts +29 -4
package/dist/voice/agent.d.ts +29 -4
package/dist/voice/agent.d.ts.map +1 -1
package/dist/voice/agent.js +140 -11
package/dist/voice/agent.js.map +1 -1
package/dist/voice/agent.test.cjs +120 -0
package/dist/voice/agent.test.cjs.map +1 -1
package/dist/voice/agent.test.js +122 -2
package/dist/voice/agent.test.js.map +1 -1
package/dist/voice/agent_activity.cjs +402 -292
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +35 -7
package/dist/voice/agent_activity.d.ts +35 -7
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +402 -287
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +156 -44
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +22 -9
package/dist/voice/agent_session.d.ts +22 -9
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +156 -44
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +89 -36
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +22 -1
package/dist/voice/audio_recognition.d.ts +22 -1
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +93 -36
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/audio_recognition_span.test.cjs +233 -0
package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
package/dist/voice/audio_recognition_span.test.js +232 -0
package/dist/voice/audio_recognition_span.test.js.map +1 -0
package/dist/voice/generation.cjs +39 -19
package/dist/voice/generation.cjs.map +1 -1
package/dist/voice/generation.d.ts.map +1 -1
package/dist/voice/generation.js +44 -20
package/dist/voice/generation.js.map +1 -1
package/dist/voice/index.cjs +2 -0
package/dist/voice/index.cjs.map +1 -1
package/dist/voice/index.d.cts +1 -1
package/dist/voice/index.d.ts +1 -1
package/dist/voice/index.d.ts.map +1 -1
package/dist/voice/index.js +2 -1
package/dist/voice/index.js.map +1 -1
package/dist/voice/io.cjs +6 -3
package/dist/voice/io.cjs.map +1 -1
package/dist/voice/io.d.cts +3 -2
package/dist/voice/io.d.ts +3 -2
package/dist/voice/io.d.ts.map +1 -1
package/dist/voice/io.js +6 -3
package/dist/voice/io.js.map +1 -1
package/dist/voice/recorder_io/recorder_io.cjs +3 -1
package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
package/dist/voice/recorder_io/recorder_io.js +3 -1
package/dist/voice/recorder_io/recorder_io.js.map +1 -1
package/dist/voice/room_io/_input.cjs +17 -17
package/dist/voice/room_io/_input.cjs.map +1 -1
package/dist/voice/room_io/_input.d.cts +2 -2
package/dist/voice/room_io/_input.d.ts +2 -2
package/dist/voice/room_io/_input.d.ts.map +1 -1
package/dist/voice/room_io/_input.js +7 -6
package/dist/voice/room_io/_input.js.map +1 -1
package/dist/voice/room_io/room_io.cjs +9 -0
package/dist/voice/room_io/room_io.cjs.map +1 -1
package/dist/voice/room_io/room_io.d.cts +3 -1
package/dist/voice/room_io/room_io.d.ts +3 -1
package/dist/voice/room_io/room_io.d.ts.map +1 -1
package/dist/voice/room_io/room_io.js +9 -0
package/dist/voice/room_io/room_io.js.map +1 -1
package/dist/voice/speech_handle.cjs +7 -1
package/dist/voice/speech_handle.cjs.map +1 -1
package/dist/voice/speech_handle.d.cts +2 -0
package/dist/voice/speech_handle.d.ts +2 -0
package/dist/voice/speech_handle.d.ts.map +1 -1
package/dist/voice/speech_handle.js +8 -2
package/dist/voice/speech_handle.js.map +1 -1
package/dist/voice/testing/run_result.cjs +66 -15
package/dist/voice/testing/run_result.cjs.map +1 -1
package/dist/voice/testing/run_result.d.cts +14 -3
package/dist/voice/testing/run_result.d.ts +14 -3
package/dist/voice/testing/run_result.d.ts.map +1 -1
package/dist/voice/testing/run_result.js +66 -15
package/dist/voice/testing/run_result.js.map +1 -1
package/dist/voice/utils.cjs +47 -0
package/dist/voice/utils.cjs.map +1 -0
package/dist/voice/utils.d.cts +4 -0
package/dist/voice/utils.d.ts +4 -0
package/dist/voice/utils.d.ts.map +1 -0
package/dist/voice/utils.js +23 -0
package/dist/voice/utils.js.map +1 -0
package/package.json +1 -1
package/src/cli.ts +20 -33
package/src/ipc/job_proc_lazy_main.ts +16 -5
package/src/llm/chat_context.ts +35 -0
package/src/llm/provider_format/index.ts +7 -2
package/src/llm/provider_format/openai.test.ts +385 -1
package/src/llm/provider_format/openai.ts +103 -0
package/src/llm/provider_format/utils.ts +6 -4
package/src/llm/realtime.ts +1 -0
package/src/log.ts +5 -2
package/src/stream/deferred_stream.ts +17 -6
package/src/stream/index.ts +1 -0
package/src/stream/multi_input_stream.test.ts +540 -0
package/src/stream/multi_input_stream.ts +172 -0
package/src/telemetry/trace_types.ts +18 -0
package/src/utils.test.ts +87 -0
package/src/utils.ts +52 -2
package/src/version.ts +1 -1
package/src/voice/agent.test.ts +140 -2
package/src/voice/agent.ts +189 -10
package/src/voice/agent_activity.ts +449 -286
package/src/voice/agent_session.ts +195 -51
package/src/voice/audio_recognition.ts +118 -38
package/src/voice/audio_recognition_span.test.ts +261 -0
package/src/voice/generation.ts +52 -23
package/src/voice/index.ts +1 -1
package/src/voice/io.ts +7 -4
package/src/voice/recorder_io/recorder_io.ts +2 -1
package/src/voice/room_io/_input.ts +11 -7
package/src/voice/room_io/room_io.ts +12 -0
package/src/voice/speech_handle.ts +9 -2
package/src/voice/testing/run_result.ts +81 -23
package/src/voice/utils.ts +29 -0

package/src/voice/audio_recognition_span.test.ts ADDED Viewed

@@ -0,0 +1,261 @@
+// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { ParticipantKind } from '@livekit/rtc-node';
+import { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
+import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
+import { describe, expect, it, vi } from 'vitest';
+import { initializeLogger } from '../log.js';
+import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
+import { setTracerProvider } from '../telemetry/index.js';
+import { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';
+import { AudioRecognition, type _TurnDetector } from './audio_recognition.js';
+function setupInMemoryTracing() {
+  const exporter = new InMemorySpanExporter();
+  const provider = new NodeTracerProvider();
+  provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
+  provider.register();
+  setTracerProvider(provider);
+  return { exporter };
+}
+function spanByName(spans: any[], name: string) {
+  return spans.find((s) => s.name === name);
+}
+class FakeVADStream extends (Object as unknown as { new (): VADStream }) {
+  // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
+  // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
+  private events: VADEvent[];
+  private idx = 0;
+  constructor(events: VADEvent[]) {
+    super();
+    this.events = events;
+  }
+  updateInputStream() {}
+  detachInputStream() {}
+  close() {}
+  [Symbol.asyncIterator]() {
+    return this;
+  }
+  async next(): Promise<IteratorResult<VADEvent>> {
+    if (this.idx >= this.events.length) {
+      return { done: true, value: undefined };
+    }
+    const value = this.events[this.idx++]!;
+    return { done: false, value };
+  }
+}
+class FakeVAD extends VAD {
+  label = 'fake-vad';
+  private events: VADEvent[];
+  constructor(events: VADEvent[]) {
+    super({ updateInterval: 1 });
+    this.events = events;
+  }
+  stream(): any {
+    return new FakeVADStream(this.events);
+  }
+}
+const alwaysTrueTurnDetector: _TurnDetector = {
+  supportsLanguage: async () => true,
+  unlikelyThreshold: async () => undefined,
+  predictEndOfTurn: async () => 1.0,
+};
+describe('AudioRecognition user_turn span parity', () => {
+  initializeLogger({ pretty: false, level: 'silent' });
+  it('creates user_turn and parents eou_detection under it (stt mode)', async () => {
+    const { exporter } = setupInMemoryTracing();
+    const hooks = {
+      onStartOfSpeech: vi.fn(),
+      onVADInferenceDone: vi.fn(),
+      onEndOfSpeech: vi.fn(),
+      onInterimTranscript: vi.fn(),
+      onFinalTranscript: vi.fn(),
+      onPreemptiveGeneration: vi.fn(),
+      retrieveChatCtx: () =>
+        ({
+          copy() {
+            return this;
+          },
+          addMessage() {},
+          toJSON() {
+            return { items: [] };
+          },
+        }) as any,
+      onEndOfTurn: vi.fn(async () => true),
+    };
+    const sttEvents: SpeechEvent[] = [
+      { type: SpeechEventType.START_OF_SPEECH },
+      {
+        type: SpeechEventType.FINAL_TRANSCRIPT,
+        alternatives: [
+          {
+            language: 'en',
+            text: 'hello',
+            startTime: 0,
+            endTime: 0,
+            confidence: 0.9,
+          },
+        ],
+      },
+      { type: SpeechEventType.END_OF_SPEECH },
+    ];
+    const sttNode = async () =>
+      new ReadableStream<SpeechEvent>({
+        start(controller) {
+          for (const ev of sttEvents) controller.enqueue(ev);
+          controller.close();
+        },
+      });
+    const ar = new AudioRecognition({
+      recognitionHooks: hooks as any,
+      stt: sttNode as any,
+      vad: undefined,
+      turnDetector: alwaysTrueTurnDetector,
+      turnDetectionMode: 'stt',
+      minEndpointingDelay: 0,
+      maxEndpointingDelay: 0,
+      sttModel: 'deepgram-nova2',
+      sttProvider: 'deepgram',
+      getLinkedParticipant: () => ({ sid: 'p1', identity: 'bob', kind: ParticipantKind.AGENT }),
+    });
+    await ar.start();
+    // allow background task to drain
+    await new Promise((r) => setTimeout(r, 20));
+    await ar.close();
+    const spans = exporter.getFinishedSpans();
+    const userTurn = spanByName(spans, 'user_turn');
+    const eou = spanByName(spans, 'eou_detection');
+    expect(userTurn, 'user_turn span missing').toBeTruthy();
+    expect(eou, 'eou_detection span missing').toBeTruthy();
+    expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
+    // creation-time attributes
+    expect(userTurn.attributes['lk.participant_id']).toBe('p1');
+    expect(userTurn.attributes['lk.participant_identity']).toBe('bob');
+    expect(userTurn.attributes['lk.participant_kind']).toBe('AGENT');
+    expect(userTurn.attributes['gen_ai.request.model']).toBe('deepgram-nova2');
+    expect(userTurn.attributes['gen_ai.provider.name']).toBe('deepgram');
+    // end-of-turn attributes
+    expect(userTurn.attributes['lk.user_transcript']).toContain('hello');
+    expect(userTurn.attributes['lk.transcript_confidence']).toBeGreaterThan(0);
+  });
+  it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {
+    const { exporter } = setupInMemoryTracing();
+    const hooks = {
+      onStartOfSpeech: vi.fn(),
+      onVADInferenceDone: vi.fn(),
+      onEndOfSpeech: vi.fn(),
+      onInterimTranscript: vi.fn(),
+      onFinalTranscript: vi.fn(),
+      onPreemptiveGeneration: vi.fn(),
+      retrieveChatCtx: () =>
+        ({
+          copy() {
+            return this;
+          },
+          addMessage() {},
+          toJSON() {
+            return { items: [] };
+          },
+        }) as any,
+      onEndOfTurn: vi.fn(async () => true),
+    };
+    const now = Date.now();
+    const vadEvents: VADEvent[] = [
+      {
+        type: VADEventType.START_OF_SPEECH,
+        samplesIndex: 0,
+        timestamp: now,
+        speechDuration: 100,
+        silenceDuration: 0,
+        frames: [],
+        probability: 0,
+        inferenceDuration: 0,
+        speaking: true,
+        rawAccumulatedSilence: 0,
+        rawAccumulatedSpeech: 0,
+      },
+      {
+        type: VADEventType.END_OF_SPEECH,
+        samplesIndex: 0,
+        timestamp: now + 200,
+        speechDuration: 100,
+        silenceDuration: 100,
+        frames: [],
+        probability: 0,
+        inferenceDuration: 0,
+        speaking: false,
+        rawAccumulatedSilence: 0,
+        rawAccumulatedSpeech: 0,
+      },
+    ];
+    const sttEvents: SpeechEvent[] = [
+      {
+        type: SpeechEventType.FINAL_TRANSCRIPT,
+        alternatives: [
+          {
+            language: 'en',
+            text: 'test',
+            startTime: 0,
+            endTime: 0,
+            confidence: 0.8,
+          },
+        ],
+      },
+    ];
+    const sttNode = async () =>
+      new ReadableStream<SpeechEvent>({
+        start(controller) {
+          for (const ev of sttEvents) controller.enqueue(ev);
+          controller.close();
+        },
+      });
+    const ar = new AudioRecognition({
+      recognitionHooks: hooks as any,
+      stt: sttNode as any,
+      vad: new FakeVAD(vadEvents) as any,
+      turnDetector: alwaysTrueTurnDetector,
+      turnDetectionMode: 'vad',
+      minEndpointingDelay: 0,
+      maxEndpointingDelay: 0,
+      sttModel: 'stt-model',
+      sttProvider: 'stt-provider',
+      getLinkedParticipant: () => ({ sid: 'p2', identity: 'alice', kind: ParticipantKind.AGENT }),
+    });
+    await ar.start();
+    await new Promise((r) => setTimeout(r, 20));
+    await ar.close();
+    const spans = exporter.getFinishedSpans();
+    const userTurn = spanByName(spans, 'user_turn');
+    const eou = spanByName(spans, 'eou_detection');
+    expect(userTurn).toBeTruthy();
+    expect(eou).toBeTruthy();
+    expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
+    expect(hooks.onStartOfSpeech).toHaveBeenCalled();
+    expect(hooks.onEndOfSpeech).toHaveBeenCalled();
+  });
+});

package/src/voice/generation.ts CHANGED Viewed

@@ -26,7 +26,13 @@ import { IdentityTransform } from '../stream/identity_transform.js';
 import { traceTypes, tracer } from '../telemetry/index.js';
 import { USERDATA_TIMED_TRANSCRIPT } from '../types.js';
 import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
-import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
+import {
+  type Agent,
+  type ModelSettings,
+  _setActivityTaskInfo,
+  functionCallStorage,
+  isStopResponse,
+} from './agent.js';
 import type { AgentSession } from './agent_session.js';
 import {
   AudioOutput,
@@ -719,7 +725,7 @@ export interface _AudioOut {
 async function forwardAudio(
   ttsStream: ReadableStream<AudioFrame>,
-  audioOuput: AudioOutput,
+  audioOutput: AudioOutput,
   out: _AudioOut,
   signal?: AbortSignal,
 ): Promise<void> {
@@ -733,8 +739,8 @@ async function forwardAudio(
   };
   try {
-    audioOuput.on(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
-    audioOuput.resume();
+    audioOutput.on(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
+    audioOutput.resume();
     while (true) {
       if (signal?.aborted) {
@@ -748,36 +754,36 @@ async function forwardAudio(
       if (
         !out.firstFrameFut.done &&
-        audioOuput.sampleRate &&
-        audioOuput.sampleRate !== frame.sampleRate &&
+        audioOutput.sampleRate &&
+        audioOutput.sampleRate !== frame.sampleRate &&
         !resampler
       ) {
-        resampler = new AudioResampler(frame.sampleRate, audioOuput.sampleRate, 1);
+        resampler = new AudioResampler(frame.sampleRate, audioOutput.sampleRate, 1);
       }
       if (resampler) {
         for (const f of resampler.push(frame)) {
-          await audioOuput.captureFrame(f);
+          await audioOutput.captureFrame(f);
         }
       } else {
-        await audioOuput.captureFrame(frame);
+        await audioOutput.captureFrame(frame);
       }
     }
     if (resampler) {
       for (const f of resampler.flush()) {
-        await audioOuput.captureFrame(f);
+        await audioOutput.captureFrame(f);
       }
     }
   } finally {
-    audioOuput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
+    audioOutput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
     if (!out.firstFrameFut.done) {
       out.firstFrameFut.reject(new Error('audio forwarding cancelled before playback started'));
     }
     reader?.releaseLock();
-    audioOuput.flush();
+    audioOutput.flush();
   }
 }
@@ -836,7 +842,7 @@ export function performToolExecutions({
     const signal = controller.signal;
     const reader = toolCallStream.getReader();
-    const tasks: Promise<any>[] = [];
+    const tasks: Task<void>[] = [];
     while (!signal.aborted) {
       const { done, value: toolCall } = await reader.read();
       if (signal.aborted) break;
@@ -929,14 +935,6 @@ export function performToolExecutions({
         'Executing LLM tool call',
       );
-      const toolExecution = asyncLocalStorage.run({ functionCall: toolCall }, async () => {
-        return await tool.execute(parsedArgs, {
-          ctx: new RunContext(session, speechHandle, toolCall),
-          toolCallId: toolCall.callId,
-          abortSignal: signal,
-        });
-      });
       const _tracableToolExecutionImpl = async (toolExecTask: Promise<unknown>, span: Span) => {
         span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_NAME, toolCall.name);
         span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_ARGS, toolCall.args);
@@ -993,11 +991,42 @@ export function performToolExecutions({
           name: 'function_tool',
         });
+      const toolTask = Task.from(
+        async () => {
+          // Ensure this task is marked inline before user tool code executes.
+          const currentTask = Task.current();
+          if (currentTask) {
+            _setActivityTaskInfo(currentTask, {
+              speechHandle,
+              functionCall: toolCall,
+              inlineTask: true,
+            });
+          }
+          const toolExecution = functionCallStorage.run({ functionCall: toolCall }, async () => {
+            return await tool.execute(parsedArgs, {
+              ctx: new RunContext(session, speechHandle, toolCall),
+              toolCallId: toolCall.callId,
+              abortSignal: signal,
+            });
+          });
+          await tracableToolExecution(toolExecution);
+        },
+        controller,
+        `performToolExecution:${toolCall.name}`,
+      );
+      _setActivityTaskInfo(toolTask, {
+        speechHandle,
+        functionCall: toolCall,
+        inlineTask: true,
+      });
       // wait, not cancelling all tool calling tasks
-      tasks.push(tracableToolExecution(toolExecution));
+      tasks.push(toolTask);
     }
-    await Promise.allSettled(tasks);
+    await Promise.allSettled(tasks.map((task) => task.result));
     if (toolOutput.output.length > 0) {
       logger.debug(
         {

package/src/voice/index.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
+export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
 export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
 export * from './avatar/index.js';
 export * from './background_audio.js';

package/src/voice/io.ts CHANGED Viewed

@@ -8,7 +8,7 @@ import type { ChatContext } from '../llm/chat_context.js';
 import type { ChatChunk } from '../llm/llm.js';
 import type { ToolContext } from '../llm/tool_context.js';
 import { log } from '../log.js';
-import { DeferredReadableStream } from '../stream/deferred_stream.js';
+import { MultiInputStream } from '../stream/multi_input_stream.js';
 import type { SpeechEvent } from '../stt/stt.js';
 import { Future } from '../utils.js';
 import type { ModelSettings } from './agent.js';
@@ -84,11 +84,14 @@ export interface AudioOutputCapabilities {
 }
 export abstract class AudioInput {
-  protected deferredStream: DeferredReadableStream<AudioFrame> =
-    new DeferredReadableStream<AudioFrame>();
+  protected multiStream: MultiInputStream<AudioFrame> = new MultiInputStream<AudioFrame>();
   get stream(): ReadableStream<AudioFrame> {
-    return this.deferredStream.stream;
+    return this.multiStream.stream;
+  }
+  async close(): Promise<void> {
+    await this.multiStream.close();
   }
   onAttached(): void {}

package/src/voice/recorder_io/recorder_io.ts CHANGED Viewed

@@ -105,6 +105,7 @@ export class RecorderIO {
       await this.outChan.close();
       await this.closeFuture.await;
       await cancelAndWait([this.forwardTask!, this.encodeTask!]);
+      await this.inRecord?.close();
       this.started = false;
     } finally {
@@ -378,7 +379,7 @@ class RecorderAudioInput extends AudioInput {
     this.source = source;
     // Set up the intercepting stream
-    this.deferredStream.setSource(this.createInterceptingStream());
+    this.multiStream.addInputStream(this.createInterceptingStream());
   }
   /**

package/src/voice/room_io/_input.ts CHANGED Viewed

@@ -1,9 +1,10 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { type AudioFrame, FrameProcessor } from '@livekit/rtc-node';
 import {
+  type AudioFrame,
   AudioStream,
+  FrameProcessor,
   type NoiseCancellationOptions,
   RemoteParticipant,
   type RemoteTrack,
@@ -25,7 +26,9 @@ export class ParticipantAudioInputStream extends AudioInput {
   private frameProcessor?: FrameProcessor<AudioFrame>;
   private publication: RemoteTrackPublication | null = null;
   private participantIdentity: string | null = null;
+  private currentInputId: string | null = null;
   private logger = log();
   constructor({
     room,
     sampleRate,
@@ -121,8 +124,9 @@ export class ParticipantAudioInputStream extends AudioInput {
   };
   private closeStream() {
-    if (this.deferredStream.isSourceSet) {
-      this.deferredStream.detachSource();
+    if (this.currentInputId) {
+      void this.multiStream.removeInputStream(this.currentInputId);
+      this.currentInputId = null;
     }
     this.publication = null;
@@ -143,7 +147,7 @@ export class ParticipantAudioInputStream extends AudioInput {
     }
     this.closeStream();
     this.publication = publication;
-    this.deferredStream.setSource(
+    this.currentInputId = this.multiStream.addInputStream(
       resampleStream({
         stream: this.createStream(track),
         outputRate: this.sampleRate,
@@ -179,14 +183,14 @@ export class ParticipantAudioInputStream extends AudioInput {
     }) as unknown as ReadableStream<AudioFrame>;
   }
-  async close() {
+  override async close() {
     this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);
     this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);
     this.room.off(RoomEvent.TokenRefreshed, this.onTokenRefreshed);
     this.closeStream();
+    await super.close();
     this.frameProcessor?.close();
     this.frameProcessor = undefined;
-    // Ignore errors - stream may be locked by RecorderIO or already cancelled
-    await this.deferredStream.stream.cancel().catch(() => {});
   }
 }

package/src/voice/room_io/room_io.ts CHANGED Viewed

@@ -376,6 +376,18 @@ export class RoomIO {
     return this.participantAvailableFuture.done;
   }
+  get linkedParticipant(): RemoteParticipant | undefined {
+    if (!this.isParticipantAvailable) {
+      return undefined;
+    }
+    return this.participantAvailableFuture.result;
+  }
+  get localParticipant(): Participant | undefined {
+    return this.room.localParticipant ?? undefined;
+  }
   /** Switch to a different participant */
   setParticipant(participantIdentity: string | null) {
     this.logger.debug({ participantIdentity }, 'setting participant');

package/src/voice/speech_handle.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import type { Context } from '@opentelemetry/api';
 import type { ChatItem } from '../llm/index.js';
 import type { Task } from '../utils.js';
 import { Event, Future, shortuuid } from '../utils.js';
-import { asyncLocalStorage } from './agent.js';
+import { functionCallStorage } from './agent.js';
 /** Symbol used to identify SpeechHandle instances */
 const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
@@ -46,6 +46,9 @@ export class SpeechHandle {
   /** @internal - OpenTelemetry context for the agent turn span */
   _agentTurnContext?: Context;
+  /** @internal - used by AgentTask/RunResult final output plumbing */
+  _maybeRunFinalOutput?: unknown;
   private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
   private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
@@ -148,7 +151,7 @@ export class SpeechHandle {
    * has entirely played out, including any tool calls and response follow-ups.
    */
   async waitForPlayout(): Promise<void> {
-    const store = asyncLocalStorage.getStore();
+    const store = functionCallStorage.getStore();
     if (store && store?.functionCall) {
       throw new Error(
         `Cannot call 'SpeechHandle.waitForPlayout()' from inside the function tool '${store.functionCall.name}'. ` +
@@ -167,6 +170,10 @@ export class SpeechHandle {
   }
   addDoneCallback(callback: (sh: SpeechHandle) => void) {
+    if (this.done()) {
+      queueMicrotask(() => callback(this));
+      return;
+    }
     this.doneCallbacks.add(callback);
   }