npm - @livekit/agents - Versions diffs - 1.0.45 → 1.0.47 - Mend

@livekit/agents 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

package/dist/cli.cjs +14 -20
package/dist/cli.cjs.map +1 -1
package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +14 -20
package/dist/cli.js.map +1 -1
package/dist/ipc/job_proc_lazy_main.cjs +14 -5
package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.js +14 -5
package/dist/ipc/job_proc_lazy_main.js.map +1 -1
package/dist/llm/chat_context.cjs +19 -0
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +4 -0
package/dist/llm/chat_context.d.ts +4 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +19 -0
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/provider_format/index.cjs +2 -0
package/dist/llm/provider_format/index.cjs.map +1 -1
package/dist/llm/provider_format/index.d.cts +1 -1
package/dist/llm/provider_format/index.d.ts +1 -1
package/dist/llm/provider_format/index.d.ts.map +1 -1
package/dist/llm/provider_format/index.js +6 -1
package/dist/llm/provider_format/index.js.map +1 -1
package/dist/llm/provider_format/openai.cjs +82 -2
package/dist/llm/provider_format/openai.cjs.map +1 -1
package/dist/llm/provider_format/openai.d.cts +1 -0
package/dist/llm/provider_format/openai.d.ts +1 -0
package/dist/llm/provider_format/openai.d.ts.map +1 -1
package/dist/llm/provider_format/openai.js +80 -1
package/dist/llm/provider_format/openai.js.map +1 -1
package/dist/llm/provider_format/openai.test.cjs +326 -0
package/dist/llm/provider_format/openai.test.cjs.map +1 -1
package/dist/llm/provider_format/openai.test.js +327 -1
package/dist/llm/provider_format/openai.test.js.map +1 -1
package/dist/llm/provider_format/utils.cjs +4 -3
package/dist/llm/provider_format/utils.cjs.map +1 -1
package/dist/llm/provider_format/utils.d.ts.map +1 -1
package/dist/llm/provider_format/utils.js +4 -3
package/dist/llm/provider_format/utils.js.map +1 -1
package/dist/llm/realtime.cjs.map +1 -1
package/dist/llm/realtime.d.cts +1 -0
package/dist/llm/realtime.d.ts +1 -0
package/dist/llm/realtime.d.ts.map +1 -1
package/dist/llm/realtime.js.map +1 -1
package/dist/log.cjs +5 -2
package/dist/log.cjs.map +1 -1
package/dist/log.d.ts.map +1 -1
package/dist/log.js +5 -2
package/dist/log.js.map +1 -1
package/dist/stream/deferred_stream.cjs +15 -6
package/dist/stream/deferred_stream.cjs.map +1 -1
package/dist/stream/deferred_stream.d.ts.map +1 -1
package/dist/stream/deferred_stream.js +15 -6
package/dist/stream/deferred_stream.js.map +1 -1
package/dist/stream/index.cjs +3 -0
package/dist/stream/index.cjs.map +1 -1
package/dist/stream/index.d.cts +1 -0
package/dist/stream/index.d.ts +1 -0
package/dist/stream/index.d.ts.map +1 -1
package/dist/stream/index.js +2 -0
package/dist/stream/index.js.map +1 -1
package/dist/stream/multi_input_stream.cjs +139 -0
package/dist/stream/multi_input_stream.cjs.map +1 -0
package/dist/stream/multi_input_stream.d.cts +55 -0
package/dist/stream/multi_input_stream.d.ts +55 -0
package/dist/stream/multi_input_stream.d.ts.map +1 -0
package/dist/stream/multi_input_stream.js +115 -0
package/dist/stream/multi_input_stream.js.map +1 -0
package/dist/stream/multi_input_stream.test.cjs +340 -0
package/dist/stream/multi_input_stream.test.cjs.map +1 -0
package/dist/stream/multi_input_stream.test.js +339 -0
package/dist/stream/multi_input_stream.test.js.map +1 -0
package/dist/telemetry/trace_types.cjs +42 -0
package/dist/telemetry/trace_types.cjs.map +1 -1
package/dist/telemetry/trace_types.d.cts +14 -0
package/dist/telemetry/trace_types.d.ts +14 -0
package/dist/telemetry/trace_types.d.ts.map +1 -1
package/dist/telemetry/trace_types.js +28 -0
package/dist/telemetry/trace_types.js.map +1 -1
package/dist/utils.cjs +44 -2
package/dist/utils.cjs.map +1 -1
package/dist/utils.d.cts +8 -0
package/dist/utils.d.ts +8 -0
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js +44 -2
package/dist/utils.js.map +1 -1
package/dist/utils.test.cjs +71 -0
package/dist/utils.test.cjs.map +1 -1
package/dist/utils.test.js +71 -0
package/dist/utils.test.js.map +1 -1
package/dist/version.cjs +1 -1
package/dist/version.cjs.map +1 -1
package/dist/version.d.cts +1 -1
package/dist/version.d.ts +1 -1
package/dist/version.d.ts.map +1 -1
package/dist/version.js +1 -1
package/dist/version.js.map +1 -1
package/dist/voice/agent.cjs +144 -12
package/dist/voice/agent.cjs.map +1 -1
package/dist/voice/agent.d.cts +29 -4
package/dist/voice/agent.d.ts +29 -4
package/dist/voice/agent.d.ts.map +1 -1
package/dist/voice/agent.js +140 -11
package/dist/voice/agent.js.map +1 -1
package/dist/voice/agent.test.cjs +120 -0
package/dist/voice/agent.test.cjs.map +1 -1
package/dist/voice/agent.test.js +122 -2
package/dist/voice/agent.test.js.map +1 -1
package/dist/voice/agent_activity.cjs +402 -292
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +35 -7
package/dist/voice/agent_activity.d.ts +35 -7
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +402 -287
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +156 -44
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +22 -9
package/dist/voice/agent_session.d.ts +22 -9
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +156 -44
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +89 -36
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +22 -1
package/dist/voice/audio_recognition.d.ts +22 -1
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +93 -36
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/audio_recognition_span.test.cjs +233 -0
package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
package/dist/voice/audio_recognition_span.test.js +232 -0
package/dist/voice/audio_recognition_span.test.js.map +1 -0
package/dist/voice/generation.cjs +39 -19
package/dist/voice/generation.cjs.map +1 -1
package/dist/voice/generation.d.ts.map +1 -1
package/dist/voice/generation.js +44 -20
package/dist/voice/generation.js.map +1 -1
package/dist/voice/index.cjs +2 -0
package/dist/voice/index.cjs.map +1 -1
package/dist/voice/index.d.cts +1 -1
package/dist/voice/index.d.ts +1 -1
package/dist/voice/index.d.ts.map +1 -1
package/dist/voice/index.js +2 -1
package/dist/voice/index.js.map +1 -1
package/dist/voice/io.cjs +6 -3
package/dist/voice/io.cjs.map +1 -1
package/dist/voice/io.d.cts +3 -2
package/dist/voice/io.d.ts +3 -2
package/dist/voice/io.d.ts.map +1 -1
package/dist/voice/io.js +6 -3
package/dist/voice/io.js.map +1 -1
package/dist/voice/recorder_io/recorder_io.cjs +3 -1
package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
package/dist/voice/recorder_io/recorder_io.js +3 -1
package/dist/voice/recorder_io/recorder_io.js.map +1 -1
package/dist/voice/room_io/_input.cjs +17 -17
package/dist/voice/room_io/_input.cjs.map +1 -1
package/dist/voice/room_io/_input.d.cts +2 -2
package/dist/voice/room_io/_input.d.ts +2 -2
package/dist/voice/room_io/_input.d.ts.map +1 -1
package/dist/voice/room_io/_input.js +7 -6
package/dist/voice/room_io/_input.js.map +1 -1
package/dist/voice/room_io/room_io.cjs +9 -0
package/dist/voice/room_io/room_io.cjs.map +1 -1
package/dist/voice/room_io/room_io.d.cts +3 -1
package/dist/voice/room_io/room_io.d.ts +3 -1
package/dist/voice/room_io/room_io.d.ts.map +1 -1
package/dist/voice/room_io/room_io.js +9 -0
package/dist/voice/room_io/room_io.js.map +1 -1
package/dist/voice/speech_handle.cjs +7 -1
package/dist/voice/speech_handle.cjs.map +1 -1
package/dist/voice/speech_handle.d.cts +2 -0
package/dist/voice/speech_handle.d.ts +2 -0
package/dist/voice/speech_handle.d.ts.map +1 -1
package/dist/voice/speech_handle.js +8 -2
package/dist/voice/speech_handle.js.map +1 -1
package/dist/voice/testing/run_result.cjs +66 -15
package/dist/voice/testing/run_result.cjs.map +1 -1
package/dist/voice/testing/run_result.d.cts +14 -3
package/dist/voice/testing/run_result.d.ts +14 -3
package/dist/voice/testing/run_result.d.ts.map +1 -1
package/dist/voice/testing/run_result.js +66 -15
package/dist/voice/testing/run_result.js.map +1 -1
package/dist/voice/utils.cjs +47 -0
package/dist/voice/utils.cjs.map +1 -0
package/dist/voice/utils.d.cts +4 -0
package/dist/voice/utils.d.ts +4 -0
package/dist/voice/utils.d.ts.map +1 -0
package/dist/voice/utils.js +23 -0
package/dist/voice/utils.js.map +1 -0
package/package.json +1 -1
package/src/cli.ts +20 -33
package/src/ipc/job_proc_lazy_main.ts +16 -5
package/src/llm/chat_context.ts +35 -0
package/src/llm/provider_format/index.ts +7 -2
package/src/llm/provider_format/openai.test.ts +385 -1
package/src/llm/provider_format/openai.ts +103 -0
package/src/llm/provider_format/utils.ts +6 -4
package/src/llm/realtime.ts +1 -0
package/src/log.ts +5 -2
package/src/stream/deferred_stream.ts +17 -6
package/src/stream/index.ts +1 -0
package/src/stream/multi_input_stream.test.ts +540 -0
package/src/stream/multi_input_stream.ts +172 -0
package/src/telemetry/trace_types.ts +18 -0
package/src/utils.test.ts +87 -0
package/src/utils.ts +52 -2
package/src/version.ts +1 -1
package/src/voice/agent.test.ts +140 -2
package/src/voice/agent.ts +189 -10
package/src/voice/agent_activity.ts +449 -286
package/src/voice/agent_session.ts +195 -51
package/src/voice/audio_recognition.ts +118 -38
package/src/voice/audio_recognition_span.test.ts +261 -0
package/src/voice/generation.ts +52 -23
package/src/voice/index.ts +1 -1
package/src/voice/io.ts +7 -4
package/src/voice/recorder_io/recorder_io.ts +2 -1
package/src/voice/room_io/_input.ts +11 -7
package/src/voice/room_io/room_io.ts +12 -0
package/src/voice/speech_handle.ts +9 -2
package/src/voice/testing/run_result.ts +81 -23
package/src/voice/utils.ts +29 -0

package/dist/voice/agent_activity.cjs CHANGED Viewed

@@ -18,7 +18,8 @@ var __copyProps = (to, from, except, desc) => {
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
 var agent_activity_exports = {};
 __export(agent_activity_exports, {
-  AgentActivity: () => AgentActivity
+  AgentActivity: () => AgentActivity,
+  agentActivityStorage: () => agentActivityStorage
 });
 module.exports = __toCommonJS(agent_activity_exports);
 var import_mutex = require("@livekit/mutex");
@@ -30,7 +31,7 @@ var import_chat_context = require("../llm/chat_context.cjs");
 var import_llm = require("../llm/index.cjs");
 var import_tool_context = require("../llm/tool_context.cjs");
 var import_log = require("../log.cjs");
-var import_deferred_stream = require("../stream/deferred_stream.cjs");
+var import_multi_input_stream = require("../stream/multi_input_stream.cjs");
 var import_stt = require("../stt/stt.cjs");
 var import_telemetry = require("../telemetry/index.cjs");
 var import_word = require("../tokenize/basic/word.cjs");
@@ -43,8 +44,11 @@ var import_audio_recognition = require("./audio_recognition.cjs");
 var import_events = require("./events.cjs");
 var import_generation = require("./generation.cjs");
 var import_speech_handle = require("./speech_handle.cjs");
-const speechHandleStorage = new import_node_async_hooks.AsyncLocalStorage();
+var import_utils2 = require("./utils.cjs");
+const agentActivityStorage = new import_node_async_hooks.AsyncLocalStorage();
 class AgentActivity {
+  agent;
+  agentSession;
   static REPLY_TASK_CANCEL_TIMEOUT = 5e3;
   started = false;
   audioRecognition;
@@ -53,22 +57,29 @@ class AgentActivity {
   // Maps response_id to OTEL span for metrics recording
   turnDetectionMode;
   logger = (0, import_log.log)();
-  _draining = false;
+  _schedulingPaused = true;
+  _drainBlockedTasks = [];
   _currentSpeech;
   speechQueue;
   // [priority, timestamp, speechHandle]
   q_updated;
   speechTasks = /* @__PURE__ */ new Set();
   lock = new import_mutex.Mutex();
-  audioStream = new import_deferred_stream.DeferredReadableStream();
+  audioStream = new import_multi_input_stream.MultiInputStream();
+  audioStreamId;
   // default to null as None, which maps to the default provider tool choice value
   toolChoice = null;
   _preemptiveGeneration;
-  agent;
-  agentSession;
   /** @internal */
   _mainTask;
+  _onEnterTask;
+  _onExitTask;
   _userTurnCompletedTask;
+  onRealtimeGenerationCreated = (ev) => this.onGenerationCreated(ev);
+  onRealtimeInputSpeechStarted = (ev) => this.onInputSpeechStarted(ev);
+  onRealtimeInputSpeechStopped = (ev) => this.onInputSpeechStopped(ev);
+  onRealtimeInputAudioTranscriptionCompleted = (ev) => this.onInputAudioTranscriptionCompleted(ev);
+  onModelError = (ev) => this.onError(ev);
   constructor(agent, agentSession) {
     this.agent = agent;
     this.agentSession = agentSession;
@@ -79,7 +90,7 @@ class AgentActivity {
     this.turnDetectionMode = typeof this.turnDetection === "string" ? this.turnDetection : void 0;
     if (this.turnDetectionMode === "vad" && this.vad === void 0) {
       this.logger.warn(
-        'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDdetection setting'
+        'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDetection setting'
       );
       this.turnDetectionMode = void 0;
     }
@@ -131,98 +142,119 @@ class AgentActivity {
   async start() {
     const unlock = await this.lock.lock();
     try {
-      const startSpan = import_telemetry.tracer.startSpan({
-        name: "start_agent_activity",
-        attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
-        context: import_api.ROOT_CONTEXT
-      });
-      this.agent._agentActivity = this;
-      if (this.llm instanceof import_llm.RealtimeModel) {
-        this.realtimeSession = this.llm.session();
-        this.realtimeSpans = /* @__PURE__ */ new Map();
-        this.realtimeSession.on("generation_created", (ev) => this.onGenerationCreated(ev));
-        this.realtimeSession.on("input_speech_started", (ev) => this.onInputSpeechStarted(ev));
-        this.realtimeSession.on("input_speech_stopped", (ev) => this.onInputSpeechStopped(ev));
-        this.realtimeSession.on(
-          "input_audio_transcription_completed",
-          (ev) => this.onInputAudioTranscriptionCompleted(ev)
-        );
-        this.realtimeSession.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
-        this.realtimeSession.on("error", (ev) => this.onError(ev));
-        (0, import_generation.removeInstructions)(this.agent._chatCtx);
-        try {
-          await this.realtimeSession.updateInstructions(this.agent.instructions);
-        } catch (error) {
-          this.logger.error(error, "failed to update the instructions");
-        }
-        try {
-          await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
-        } catch (error) {
-          this.logger.error(error, "failed to update the chat context");
-        }
-        try {
-          await this.realtimeSession.updateTools(this.tools);
-        } catch (error) {
-          this.logger.error(error, "failed to update the tools");
-        }
-        if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
-          this.logger.error(
-            "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
-          );
-        }
-      } else if (this.llm instanceof import_llm.LLM) {
-        try {
-          (0, import_generation.updateInstructions)({
-            chatCtx: this.agent._chatCtx,
-            instructions: this.agent.instructions,
-            addIfMissing: true
-          });
-        } catch (error) {
-          this.logger.error("failed to update the instructions", error);
-        }
+      await this._startSession({ spanName: "start_agent_activity", runOnEnter: true });
+    } finally {
+      unlock();
+    }
+  }
+  async resume() {
+    const unlock = await this.lock.lock();
+    try {
+      await this._startSession({ spanName: "resume_agent_activity", runOnEnter: false });
+    } finally {
+      unlock();
+    }
+  }
+  async _startSession(options) {
+    var _a;
+    const { spanName, runOnEnter } = options;
+    const startSpan = import_telemetry.tracer.startSpan({
+      name: spanName,
+      attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
+      context: import_api.ROOT_CONTEXT
+    });
+    this.agent._agentActivity = this;
+    if (this.llm instanceof import_llm.RealtimeModel) {
+      this.realtimeSession = this.llm.session();
+      this.realtimeSpans = /* @__PURE__ */ new Map();
+      this.realtimeSession.on("generation_created", this.onRealtimeGenerationCreated);
+      this.realtimeSession.on("input_speech_started", this.onRealtimeInputSpeechStarted);
+      this.realtimeSession.on("input_speech_stopped", this.onRealtimeInputSpeechStopped);
+      this.realtimeSession.on(
+        "input_audio_transcription_completed",
+        this.onRealtimeInputAudioTranscriptionCompleted
+      );
+      this.realtimeSession.on("metrics_collected", this.onMetricsCollected);
+      this.realtimeSession.on("error", this.onModelError);
+      (0, import_generation.removeInstructions)(this.agent._chatCtx);
+      try {
+        await this.realtimeSession.updateInstructions(this.agent.instructions);
+      } catch (error) {
+        this.logger.error(error, "failed to update the instructions");
       }
-      if (this.llm instanceof import_llm.LLM) {
-        this.llm.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
-        this.llm.on("error", (ev) => this.onError(ev));
+      try {
+        await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
+      } catch (error) {
+        this.logger.error(error, "failed to update the chat context");
       }
-      if (this.stt instanceof import_stt.STT) {
-        this.stt.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
-        this.stt.on("error", (ev) => this.onError(ev));
+      try {
+        await this.realtimeSession.updateTools(this.tools);
+      } catch (error) {
+        this.logger.error(error, "failed to update the tools");
       }
-      if (this.tts instanceof import_tts.TTS) {
-        this.tts.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
-        this.tts.on("error", (ev) => this.onError(ev));
+      if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
+        this.logger.error(
+          "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
+        );
       }
-      if (this.vad instanceof import_vad.VAD) {
-        this.vad.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
+    } else if (this.llm instanceof import_llm.LLM) {
+      try {
+        (0, import_generation.updateInstructions)({
+          chatCtx: this.agent._chatCtx,
+          instructions: this.agent.instructions,
+          addIfMissing: true
+        });
+      } catch (error) {
+        this.logger.error("failed to update the instructions", error);
       }
-      this.audioRecognition = new import_audio_recognition.AudioRecognition({
-        recognitionHooks: this,
-        // Disable stt node if stt is not provided
-        stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
-        vad: this.vad,
-        turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
-        turnDetectionMode: this.turnDetectionMode,
-        minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
-        maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
-        rootSpanContext: this.agentSession.rootSpanContext
-      });
-      this.audioRecognition.start();
-      this.started = true;
-      this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
-      const onEnterTask = import_telemetry.tracer.startActiveSpan(async () => this.agent.onEnter(), {
-        name: "on_enter",
-        context: import_api.trace.setSpan(import_api.ROOT_CONTEXT, startSpan),
-        attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
-      });
-      this.createSpeechTask({
-        task: import_utils.Task.from(() => onEnterTask),
+    }
+    if (this.llm instanceof import_llm.LLM) {
+      this.llm.on("metrics_collected", this.onMetricsCollected);
+      this.llm.on("error", this.onModelError);
+    }
+    if (this.stt instanceof import_stt.STT) {
+      this.stt.on("metrics_collected", this.onMetricsCollected);
+      this.stt.on("error", this.onModelError);
+    }
+    if (this.tts instanceof import_tts.TTS) {
+      this.tts.on("metrics_collected", this.onMetricsCollected);
+      this.tts.on("error", this.onModelError);
+    }
+    if (this.vad instanceof import_vad.VAD) {
+      this.vad.on("metrics_collected", this.onMetricsCollected);
+    }
+    this.audioRecognition = new import_audio_recognition.AudioRecognition({
+      recognitionHooks: this,
+      // Disable stt node if stt is not provided
+      stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
+      vad: this.vad,
+      turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
+      turnDetectionMode: this.turnDetectionMode,
+      minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
+      maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
+      rootSpanContext: this.agentSession.rootSpanContext,
+      sttModel: (_a = this.stt) == null ? void 0 : _a.label,
+      sttProvider: this.getSttProvider(),
+      getLinkedParticipant: () => {
+        var _a2;
+        return (_a2 = this.agentSession._roomIO) == null ? void 0 : _a2.linkedParticipant;
+      }
+    });
+    this.audioRecognition.start();
+    this.started = true;
+    this._resumeSchedulingTask();
+    if (runOnEnter) {
+      this._onEnterTask = this.createSpeechTask({
+        taskFn: () => import_telemetry.tracer.startActiveSpan(async () => this.agent.onEnter(), {
+          name: "on_enter",
+          context: import_api.trace.setSpan(import_api.ROOT_CONTEXT, startSpan),
+          attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
+        }),
+        inlineTask: true,
         name: "AgentActivity_onEnter"
       });
-      startSpan.end();
-    } finally {
-      unlock();
     }
+    startSpan.end();
   }
   get currentSpeech() {
     return this._currentSpeech;
@@ -233,6 +265,15 @@ class AgentActivity {
   get stt() {
     return this.agent.stt || this.agentSession.stt;
   }
+  getSttProvider() {
+    var _a;
+    const label = (_a = this.stt) == null ? void 0 : _a.label;
+    if (!label) {
+      return void 0;
+    }
+    const [provider] = label.split("-", 1);
+    return provider || label;
+  }
   get llm() {
     return this.agent.llm || this.agentSession.llm;
   }
@@ -242,8 +283,8 @@ class AgentActivity {
   get tools() {
     return this.agent.toolCtx;
   }
-  get draining() {
-    return this._draining;
+  get schedulingPaused() {
+    return this._schedulingPaused;
   }
   get realtimeLLMSession() {
     return this.realtimeSession;
@@ -283,11 +324,9 @@ class AgentActivity {
     }
   }
   attachAudioInput(audioStream) {
-    if (this.audioStream.isSourceSet) {
-      this.logger.debug("detaching existing audio input in agent activity");
-      this.audioStream.detachSource();
-    }
-    this.audioStream.setSource(audioStream);
+    void this.audioStream.close();
+    this.audioStream = new import_multi_input_stream.MultiInputStream();
+    this.audioStreamId = this.audioStream.addInputStream(audioStream);
     const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
     if (this.realtimeSession) {
       this.realtimeSession.setInputAudioStream(realtimeAudioStream);
@@ -297,13 +336,21 @@ class AgentActivity {
     }
   }
   detachAudioInput() {
-    this.audioStream.detachSource();
+    if (this.audioStreamId === void 0) {
+      return;
+    }
+    void this.audioStream.close();
+    this.audioStream = new import_multi_input_stream.MultiInputStream();
+    this.audioStreamId = void 0;
   }
-  commitUserTurn() {
+  commitUserTurn(options = {}) {
+    const { audioDetached = false, throwIfNotReady = true } = options;
     if (!this.audioRecognition) {
-      throw new Error("AudioRecognition is not initialized");
+      if (throwIfNotReady) {
+        throw new Error("AudioRecognition is not initialized");
+      }
+      return;
     }
-    const audioDetached = false;
     this.audioRecognition.commitUserTurn(audioDetached);
   }
   clearUserTurn() {
@@ -339,19 +386,17 @@ class AgentActivity {
       })
     );
     const task = this.createSpeechTask({
-      task: import_utils.Task.from(
-        (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
-      ),
+      taskFn: (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio),
       ownedSpeechHandle: handle,
       name: "AgentActivity.say_tts"
     });
-    task.finally(() => this.onPipelineReplyDone());
+    task.result.finally(() => this.onPipelineReplyDone());
     this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
     return handle;
   }
   // -- Metrics and errors --
   onMetricsCollected = (ev) => {
-    const speechHandle = speechHandleStorage.getStore();
+    const speechHandle = import_agent.speechHandleStorage.getStore();
     if (speechHandle && (ev.type === "llm_metrics" || ev.type === "tts_metrics")) {
       ev.speechId = speechHandle.id;
     }
@@ -435,8 +480,8 @@ class AgentActivity {
     if (ev.userInitiated) {
       return;
     }
-    if (this.draining) {
-      this.logger.warn("skipping new realtime generation, the agent is draining");
+    if (this.schedulingPaused) {
+      this.logger.warn("skipping new realtime generation, the speech scheduling is not running");
       return;
     }
     const handle = import_speech_handle.SpeechHandle.create({
@@ -452,9 +497,7 @@ class AgentActivity {
     );
     this.logger.info({ speech_id: handle.id }, "Creating speech handle");
     this.createSpeechTask({
-      task: import_utils.Task.from(
-        (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
-      ),
+      taskFn: (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController),
       ownedSpeechHandle: handle,
       name: "AgentActivity.realtimeGeneration"
     });
@@ -541,7 +584,7 @@ class AgentActivity {
     }
   }
   onPreemptiveGeneration(info) {
-    if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
+    if (!this.agentSession.options.preemptiveGeneration || this.schedulingPaused || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
       return;
     }
     this.cancelPreemptiveGeneration();
@@ -579,7 +622,21 @@ class AgentActivity {
     }
   }
   createSpeechTask(options) {
-    const { task, ownedSpeechHandle } = options;
+    const { taskFn, controller, ownedSpeechHandle, inlineTask, name } = options;
+    const wrappedFn = (ctrl) => {
+      return agentActivityStorage.run(this, () => {
+        const currentTask = import_utils.Task.current();
+        if (currentTask) {
+          (0, import_agent._setActivityTaskInfo)(currentTask, { speechHandle: ownedSpeechHandle, inlineTask });
+        }
+        if (ownedSpeechHandle) {
+          return import_agent.speechHandleStorage.run(ownedSpeechHandle, () => taskFn(ctrl));
+        }
+        return taskFn(ctrl);
+      });
+    };
+    const task = import_utils.Task.from(wrappedFn, controller, name);
+    (0, import_agent._setActivityTaskInfo)(task, { speechHandle: ownedSpeechHandle, inlineTask });
     this.speechTasks.add(task);
     task.addDoneCallback(() => {
       this.speechTasks.delete(task);
@@ -595,12 +652,15 @@ class AgentActivity {
     task.addDoneCallback(() => {
       this.wakeupMainTask();
     });
-    return task.result;
+    return task;
   }
   async onEndOfTurn(info) {
-    if (this.draining) {
+    if (this.schedulingPaused) {
       this.cancelPreemptiveGeneration();
-      this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
+      this.logger.warn(
+        { user_input: info.newTranscript },
+        "skipping user input, speech scheduling is paused"
+      );
       return true;
     }
     if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
@@ -619,7 +679,7 @@ class AgentActivity {
     }
     const oldTask = this._userTurnCompletedTask;
     this._userTurnCompletedTask = this.createSpeechTask({
-      task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
+      taskFn: () => this.userTurnCompleted(info, oldTask),
       name: "AgentActivity.userTurnCompleted"
     });
     return true;
@@ -649,14 +709,41 @@ class AgentActivity {
         await speechHandle._waitForGeneration();
         this._currentSpeech = void 0;
       }
-      if (this.draining && this.speechTasks.size === 0) {
-        this.logger.info("mainTask: draining and no more speech tasks");
+      const toWait = this.getDrainPendingSpeechTasks();
+      if (this._schedulingPaused && toWait.length === 0) {
+        this.logger.info("mainTask: scheduling paused and no more speech tasks to wait");
         break;
       }
       this.q_updated = new import_utils.Future();
     }
     this.logger.info("AgentActivity mainTask: exiting");
   }
+  getDrainPendingSpeechTasks() {
+    const blockedHandles = [];
+    for (const task of this._drainBlockedTasks) {
+      const info = (0, import_agent._getActivityTaskInfo)(task);
+      if (!info) {
+        this.logger.error("blocked task without activity info; skipping.");
+        continue;
+      }
+      if (!info.speechHandle) {
+        continue;
+      }
+      blockedHandles.push(info.speechHandle);
+    }
+    const toWait = [];
+    for (const task of this.speechTasks) {
+      if (this._drainBlockedTasks.includes(task)) {
+        continue;
+      }
+      const info = (0, import_agent._getActivityTaskInfo)(task);
+      if (info && info.speechHandle && blockedHandles.includes(info.speechHandle)) {
+        continue;
+      }
+      toWait.push(task);
+    }
+    return toWait;
+  }
   wakeupMainTask() {
     this.q_updated.resolve();
   }
@@ -682,7 +769,7 @@ class AgentActivity {
     if (this.llm === void 0) {
       throw new Error("trying to generate reply without an LLM model");
     }
-    const functionCall = (_a = import_agent.asyncLocalStorage.getStore()) == null ? void 0 : _a.functionCall;
+    const functionCall = (_a = import_agent.functionCallStorage.getStore()) == null ? void 0 : _a.functionCall;
     if (toolChoice === void 0 && functionCall !== void 0) {
       toolChoice = "none";
     }
@@ -700,19 +787,17 @@ class AgentActivity {
     this.logger.info({ speech_id: handle.id }, "Creating speech handle");
     if (this.llm instanceof import_llm.RealtimeModel) {
       this.createSpeechTask({
-        task: import_utils.Task.from(
-          (abortController) => this.realtimeReplyTask({
-            speechHandle: handle,
-            // TODO(brian): support llm.ChatMessage for the realtime model
-            userInput: userMessage == null ? void 0 : userMessage.textContent,
-            instructions,
-            modelSettings: {
-              // isGiven(toolChoice) = toolChoice !== undefined
-              toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
-            },
-            abortController
-          })
-        ),
+        taskFn: (abortController) => this.realtimeReplyTask({
+          speechHandle: handle,
+          // TODO(brian): support llm.ChatMessage for the realtime model
+          userInput: userMessage == null ? void 0 : userMessage.textContent,
+          instructions,
+          modelSettings: {
+            // isGiven(toolChoice) = toolChoice !== undefined
+            toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
+          },
+          abortController
+        }),
         ownedSpeechHandle: handle,
         name: "AgentActivity.realtimeReply"
       });
@@ -722,36 +807,36 @@ class AgentActivity {
 ${instructions}`;
       }
       const task = this.createSpeechTask({
-        task: import_utils.Task.from(
-          (abortController) => this.pipelineReplyTask(
-            handle,
-            chatCtx ?? this.agent.chatCtx,
-            this.agent.toolCtx,
-            {
-              toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
-            },
-            abortController,
-            instructions,
-            userMessage
-          )
+        taskFn: (abortController) => this.pipelineReplyTask(
+          handle,
+          chatCtx ?? this.agent.chatCtx,
+          this.agent.toolCtx,
+          {
+            toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
+          },
+          abortController,
+          instructions,
+          userMessage
         ),
         ownedSpeechHandle: handle,
         name: "AgentActivity.pipelineReply"
       });
-      task.finally(() => this.onPipelineReplyDone());
+      task.result.finally(() => this.onPipelineReplyDone());
     }
     if (scheduleSpeech) {
       this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
     }
     return handle;
   }
-  interrupt() {
+  interrupt(options = {}) {
     var _a;
+    const { force = false } = options;
+    this.cancelPreemptiveGeneration();
     const future = new import_utils.Future();
     const currentSpeech = this._currentSpeech;
-    currentSpeech == null ? void 0 : currentSpeech.interrupt();
+    currentSpeech == null ? void 0 : currentSpeech.interrupt(force);
     for (const [_, __, speech] of this.speechQueue) {
-      speech.interrupt();
+      speech.interrupt(force);
     }
     (_a = this.realtimeSession) == null ? void 0 : _a.interrupt();
     if (currentSpeech === void 0) {
@@ -772,7 +857,7 @@ ${instructions}`;
   async userTurnCompleted(info, oldTask) {
     var _a, _b;
     if (oldTask) {
-      await oldTask;
+      await oldTask.result;
     }
     if (this.llm instanceof import_llm.RealtimeModel) {
       if (this.llm.capabilities.turnDetection) {
@@ -854,7 +939,7 @@ ${instructions}`;
   }
   async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
     speechHandle._agentTurnContext = import_api.context.active();
-    speechHandleStorage.enterWith(speechHandle);
+    import_agent.speechHandleStorage.enterWith(speechHandle);
     const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
     const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
@@ -959,7 +1044,7 @@ ${instructions}`;
     toolsMessages,
     span
   }) => {
-    var _a, _b, _c, _d;
+    var _a, _b;
     speechHandle._agentTurnContext = import_api.context.active();
     span.setAttribute(import_telemetry.traceTypes.ATTR_SPEECH_ID, speechHandle.id);
     if (instructions) {
@@ -968,7 +1053,11 @@ ${instructions}`;
     if (newMessage) {
       span.setAttribute(import_telemetry.traceTypes.ATTR_USER_INPUT, newMessage.textContent || "");
     }
-    speechHandleStorage.enterWith(speechHandle);
+    const localParticipant = (_a = this.agentSession._roomIO) == null ? void 0 : _a.localParticipant;
+    if (localParticipant) {
+      (0, import_utils2.setParticipantSpanAttributes)(span, localParticipant);
+    }
+    import_agent.speechHandleStorage.enterWith(speechHandle);
     const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
     const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
     chatCtx = chatCtx.copy();
@@ -1027,7 +1116,7 @@ ${instructions}`;
     speechHandle._clearAuthorization();
     const replyStartedAt = Date.now();
     let transcriptionInput = llmOutput;
-    if (this.useTtsAlignedTranscript && ((_a = this.tts) == null ? void 0 : _a.capabilities.alignedTranscript) && ttsGenData) {
+    if (this.useTtsAlignedTranscript && ((_b = this.tts) == null ? void 0 : _b.capabilities.alignedTranscript) && ttsGenData) {
       const timedTextsStream = await Promise.race([
         ttsGenData.timedTextsFut.await,
         (ttsTask == null ? void 0 : ttsTask.result.catch(
@@ -1101,11 +1190,11 @@ ${instructions}`;
       for (const msg of toolsMessages) {
         msg.createdAt = replyStartedAt;
       }
-      this.agent._chatCtx.insert(toolsMessages);
       const toolCallOutputs = toolsMessages.filter(
         (m) => m.type === "function_call_output"
       );
       if (toolCallOutputs.length > 0) {
+        this.agent._chatCtx.insert(toolCallOutputs);
         this.agentSession._toolItemsAdded(toolCallOutputs);
       }
     }
@@ -1193,45 +1282,15 @@ ${instructions}`;
       );
       return;
     }
-    const functionToolsExecutedEvent = (0, import_events.createFunctionToolsExecutedEvent)({
-      functionCalls: [],
-      functionCallOutputs: []
-    });
-    let shouldGenerateToolReply = false;
-    let newAgentTask = null;
-    let ignoreTaskSwitch = false;
-    for (const sanitizedOut of toolOutput.output) {
-      if (sanitizedOut.toolCallOutput !== void 0) {
-        functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
-        functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
-        if (sanitizedOut.replyRequired) {
-          shouldGenerateToolReply = true;
-        }
-      }
-      if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
-        this.logger.error("expected to receive only one agent task from the tool executions");
-        ignoreTaskSwitch = true;
-      }
-      newAgentTask = sanitizedOut.agentTask ?? null;
-      this.logger.debug(
-        {
-          speechId: speechHandle.id,
-          name: (_b = sanitizedOut.toolCall) == null ? void 0 : _b.name,
-          args: sanitizedOut.toolCall.args,
-          output: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.output,
-          isError: (_d = sanitizedOut.toolCallOutput) == null ? void 0 : _d.isError
-        },
-        "Tool call execution finished"
-      );
-    }
+    const { functionToolsExecutedEvent, shouldGenerateToolReply, newAgentTask, ignoreTaskSwitch } = this.summarizeToolExecutionOutput(toolOutput, speechHandle);
     this.agentSession.emit(
       import_events.AgentSessionEventTypes.FunctionToolsExecuted,
       functionToolsExecutedEvent
     );
-    let draining = this.draining;
+    let schedulingPaused = this.schedulingPaused;
     if (!ignoreTaskSwitch && newAgentTask !== null) {
       this.agentSession.updateAgent(newAgentTask);
-      draining = true;
+      schedulingPaused = true;
     }
     const toolMessages = [
       ...functionToolsExecutedEvent.functionCalls,
@@ -1240,34 +1299,32 @@ ${instructions}`;
     if (shouldGenerateToolReply) {
       chatCtx.insert(toolMessages);
       speechHandle._numSteps += 1;
-      const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
+      const respondToolChoice = schedulingPaused || modelSettings.toolChoice === "none" ? "none" : "auto";
       const toolResponseTask = this.createSpeechTask({
-        task: import_utils.Task.from(
-          () => this.pipelineReplyTask(
-            speechHandle,
-            chatCtx,
-            toolCtx,
-            { toolChoice: respondToolChoice },
-            replyAbortController,
-            instructions,
-            void 0,
-            toolMessages
-          )
+        taskFn: () => this.pipelineReplyTask(
+          speechHandle,
+          chatCtx,
+          toolCtx,
+          { toolChoice: respondToolChoice },
+          replyAbortController,
+          instructions,
+          void 0,
+          toolMessages
         ),
         ownedSpeechHandle: speechHandle,
         name: "AgentActivity.pipelineReply"
       });
-      toolResponseTask.finally(() => this.onPipelineReplyDone());
+      toolResponseTask.result.finally(() => this.onPipelineReplyDone());
       this.scheduleSpeech(speechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
     } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
       for (const msg of toolMessages) {
         msg.createdAt = replyStartedAt;
       }
-      this.agent._chatCtx.insert(toolMessages);
       const toolCallOutputs = toolMessages.filter(
         (m) => m.type === "function_call_output"
       );
       if (toolCallOutputs.length > 0) {
+        this.agent._chatCtx.insert(toolCallOutputs);
         this.agentSession._toolItemsAdded(toolCallOutputs);
       }
     }
@@ -1311,10 +1368,14 @@ ${instructions}`;
     replyAbortController,
     span
   }) {
-    var _a, _b, _c;
+    var _a;
     speechHandle._agentTurnContext = import_api.context.active();
     span.setAttribute(import_telemetry.traceTypes.ATTR_SPEECH_ID, speechHandle.id);
-    speechHandleStorage.enterWith(speechHandle);
+    const localParticipant = (_a = this.agentSession._roomIO) == null ? void 0 : _a.localParticipant;
+    if (localParticipant) {
+      (0, import_utils2.setParticipantSpanAttributes)(span, localParticipant);
+    }
+    import_agent.speechHandleStorage.enterWith(speechHandle);
     if (!this.realtimeSession) {
       throw new Error("realtime session is not initialized");
     }
@@ -1567,44 +1628,15 @@ ${instructions}`;
       );
       return;
     }
-    const functionToolsExecutedEvent = (0, import_events.createFunctionToolsExecutedEvent)({
-      functionCalls: [],
-      functionCallOutputs: []
-    });
-    let shouldGenerateToolReply = false;
-    let newAgentTask = null;
-    let ignoreTaskSwitch = false;
-    for (const sanitizedOut of toolOutput.output) {
-      if (sanitizedOut.toolCallOutput !== void 0) {
-        functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
-        if (sanitizedOut.replyRequired) {
-          shouldGenerateToolReply = true;
-        }
-      }
-      if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
-        this.logger.error("expected to receive only one agent task from the tool executions");
-        ignoreTaskSwitch = true;
-      }
-      newAgentTask = sanitizedOut.agentTask ?? null;
-      this.logger.debug(
-        {
-          speechId: speechHandle.id,
-          name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
-          args: sanitizedOut.toolCall.args,
-          output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
-          isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
-        },
-        "Tool call execution finished"
-      );
-    }
+    const { functionToolsExecutedEvent, shouldGenerateToolReply, newAgentTask, ignoreTaskSwitch } = this.summarizeToolExecutionOutput(toolOutput, speechHandle);
     this.agentSession.emit(
       import_events.AgentSessionEventTypes.FunctionToolsExecuted,
       functionToolsExecutedEvent
     );
-    let draining = this.draining;
+    let schedulingPaused = this.schedulingPaused;
     if (!ignoreTaskSwitch && newAgentTask !== null) {
       this.agentSession.updateAgent(newAgentTask);
-      draining = true;
+      schedulingPaused = true;
     }
     if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
       while (this.currentSpeech || this.speechQueue.size() > 0) {
@@ -1645,20 +1677,58 @@ ${instructions}`;
         speechHandle: replySpeechHandle
       })
     );
-    const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
+    const toolChoice = schedulingPaused || modelSettings.toolChoice === "none" ? "none" : "auto";
     this.createSpeechTask({
-      task: import_utils.Task.from(
-        (abortController) => this.realtimeReplyTask({
-          speechHandle: replySpeechHandle,
-          modelSettings: { toolChoice },
-          abortController
-        })
-      ),
+      taskFn: (abortController) => this.realtimeReplyTask({
+        speechHandle: replySpeechHandle,
+        modelSettings: { toolChoice },
+        abortController
+      }),
       ownedSpeechHandle: replySpeechHandle,
       name: "AgentActivity.realtime_reply"
     });
     this.scheduleSpeech(replySpeechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
   }
+  summarizeToolExecutionOutput(toolOutput, speechHandle) {
+    var _a, _b, _c;
+    const functionToolsExecutedEvent = (0, import_events.createFunctionToolsExecutedEvent)({
+      functionCalls: [],
+      functionCallOutputs: []
+    });
+    let shouldGenerateToolReply = false;
+    let newAgentTask = null;
+    let ignoreTaskSwitch = false;
+    for (const sanitizedOut of toolOutput.output) {
+      if (sanitizedOut.toolCallOutput !== void 0) {
+        functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
+        functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
+        if (sanitizedOut.replyRequired) {
+          shouldGenerateToolReply = true;
+        }
+      }
+      if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
+        this.logger.error("expected to receive only one agent task from the tool executions");
+        ignoreTaskSwitch = true;
+      }
+      newAgentTask = sanitizedOut.agentTask ?? null;
+      this.logger.debug(
+        {
+          speechId: speechHandle.id,
+          name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
+          args: sanitizedOut.toolCall.args,
+          output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
+          isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
+        },
+        "Tool call execution finished"
+      );
+    }
+    return {
+      functionToolsExecutedEvent,
+      shouldGenerateToolReply,
+      newAgentTask,
+      ignoreTaskSwitch
+    };
+  }
   async realtimeReplyTask({
     speechHandle,
     modelSettings: { toolChoice },
@@ -1666,7 +1736,7 @@ ${instructions}`;
     instructions,
     abortController
   }) {
-    speechHandleStorage.enterWith(speechHandle);
+    import_agent.speechHandleStorage.enterWith(speechHandle);
     if (!this.realtimeSession) {
       throw new Error("realtime session is not available");
     }
@@ -1700,13 +1770,45 @@ ${instructions}`;
     }
   }
   scheduleSpeech(speechHandle, priority, force = false) {
-    if (this.draining && !force) {
-      throw new Error("cannot schedule new speech, the agent is draining");
+    if (this.schedulingPaused && !force) {
+      throw new Error("cannot schedule new speech, the speech scheduling is draining/pausing");
     }
     this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
     speechHandle._markScheduled();
     this.wakeupMainTask();
   }
+  async _pauseSchedulingTask(blockedTasks) {
+    if (this._schedulingPaused) return;
+    this._schedulingPaused = true;
+    this._drainBlockedTasks = blockedTasks;
+    this.wakeupMainTask();
+    if (this._mainTask) {
+      await this._mainTask.result;
+    }
+  }
+  _resumeSchedulingTask() {
+    if (!this._schedulingPaused) return;
+    this._schedulingPaused = false;
+    this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
+  }
+  async pause(options = {}) {
+    const { blockedTasks = [] } = options;
+    const unlock = await this.lock.lock();
+    try {
+      const span = import_telemetry.tracer.startSpan({
+        name: "pause_agent_activity",
+        attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
+      });
+      try {
+        await this._pauseSchedulingTask(blockedTasks);
+        await this._closeSessionResources();
+      } finally {
+        span.end();
+      }
+    } finally {
+      unlock();
+    }
+  }
   async drain() {
     return import_telemetry.tracer.startActiveSpan(async (span) => this._drainImpl(span), {
       name: "drain_agent_activity",
@@ -1714,72 +1816,80 @@ ${instructions}`;
     });
   }
   async _drainImpl(span) {
-    var _a;
     span.setAttribute(import_telemetry.traceTypes.ATTR_AGENT_LABEL, this.agent.id);
     const unlock = await this.lock.lock();
     try {
-      if (this._draining) return;
-      this.cancelPreemptiveGeneration();
-      const onExitTask = import_telemetry.tracer.startActiveSpan(async () => this.agent.onExit(), {
-        name: "on_exit",
-        attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
-      });
-      this.createSpeechTask({
-        task: import_utils.Task.from(() => onExitTask),
+      if (this._schedulingPaused) return;
+      this._onExitTask = this.createSpeechTask({
+        taskFn: () => import_telemetry.tracer.startActiveSpan(async () => this.agent.onExit(), {
+          name: "on_exit",
+          attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
+        }),
+        inlineTask: true,
         name: "AgentActivity_onExit"
       });
-      this.wakeupMainTask();
-      this._draining = true;
-      await ((_a = this._mainTask) == null ? void 0 : _a.result);
+      this.cancelPreemptiveGeneration();
+      await this._onExitTask.result;
+      await this._pauseSchedulingTask([]);
     } finally {
       unlock();
     }
   }
   async close() {
-    var _a, _b, _c, _d;
     const unlock = await this.lock.lock();
     try {
-      if (!this._draining) {
-        this.logger.warn("task closing without draining");
-      }
       this.cancelPreemptiveGeneration();
-      if (this.llm instanceof import_llm.LLM) {
-        this.llm.off("metrics_collected", this.onMetricsCollected);
-      }
-      if (this.realtimeSession) {
-        this.realtimeSession.off("generation_created", this.onGenerationCreated);
-        this.realtimeSession.off("input_speech_started", this.onInputSpeechStarted);
-        this.realtimeSession.off("input_speech_stopped", this.onInputSpeechStopped);
-        this.realtimeSession.off(
-          "input_audio_transcription_completed",
-          this.onInputAudioTranscriptionCompleted
-        );
-        this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
-      }
-      if (this.stt instanceof import_stt.STT) {
-        this.stt.off("metrics_collected", this.onMetricsCollected);
-      }
-      if (this.tts instanceof import_tts.TTS) {
-        this.tts.off("metrics_collected", this.onMetricsCollected);
+      await this._closeSessionResources();
+      if (this._mainTask) {
+        await this._mainTask.cancelAndWait();
       }
-      if (this.vad instanceof import_vad.VAD) {
-        this.vad.off("metrics_collected", this.onMetricsCollected);
-      }
-      this.detachAudioInput();
-      (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
-      await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
-      await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
-      await ((_d = this._mainTask) == null ? void 0 : _d.cancelAndWait());
+      this.agent._agentActivity = void 0;
     } finally {
       unlock();
     }
   }
+  async _closeSessionResources() {
+    var _a, _b, _c;
+    if (this.llm instanceof import_llm.LLM) {
+      this.llm.off("metrics_collected", this.onMetricsCollected);
+      this.llm.off("error", this.onModelError);
+    }
+    if (this.realtimeSession) {
+      this.realtimeSession.off("generation_created", this.onRealtimeGenerationCreated);
+      this.realtimeSession.off("input_speech_started", this.onRealtimeInputSpeechStarted);
+      this.realtimeSession.off("input_speech_stopped", this.onRealtimeInputSpeechStopped);
+      this.realtimeSession.off(
+        "input_audio_transcription_completed",
+        this.onRealtimeInputAudioTranscriptionCompleted
+      );
+      this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
+      this.realtimeSession.off("error", this.onModelError);
+    }
+    if (this.stt instanceof import_stt.STT) {
+      this.stt.off("metrics_collected", this.onMetricsCollected);
+      this.stt.off("error", this.onModelError);
+    }
+    if (this.tts instanceof import_tts.TTS) {
+      this.tts.off("metrics_collected", this.onMetricsCollected);
+      this.tts.off("error", this.onModelError);
+    }
+    if (this.vad instanceof import_vad.VAD) {
+      this.vad.off("metrics_collected", this.onMetricsCollected);
+    }
+    this.detachAudioInput();
+    (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
+    await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
+    await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
+    this.realtimeSession = void 0;
+    this.audioRecognition = void 0;
+  }
 }
 function toOaiToolChoice(toolChoice) {
   return toolChoice !== null ? toolChoice : void 0;
 }
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
-  AgentActivity
+  AgentActivity,
+  agentActivityStorage
 });
 //# sourceMappingURL=agent_activity.cjs.map