npm - @livekit/agents - Versions diffs - 1.0.45 → 1.0.46 - Mend

@livekit/agents 1.0.45 → 1.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

package/dist/stream/index.cjs +3 -0
package/dist/stream/index.cjs.map +1 -1
package/dist/stream/index.d.cts +1 -0
package/dist/stream/index.d.ts +1 -0
package/dist/stream/index.d.ts.map +1 -1
package/dist/stream/index.js +2 -0
package/dist/stream/index.js.map +1 -1
package/dist/stream/multi_input_stream.cjs +139 -0
package/dist/stream/multi_input_stream.cjs.map +1 -0
package/dist/stream/multi_input_stream.d.cts +55 -0
package/dist/stream/multi_input_stream.d.ts +55 -0
package/dist/stream/multi_input_stream.d.ts.map +1 -0
package/dist/stream/multi_input_stream.js +115 -0
package/dist/stream/multi_input_stream.js.map +1 -0
package/dist/stream/multi_input_stream.test.cjs +340 -0
package/dist/stream/multi_input_stream.test.cjs.map +1 -0
package/dist/stream/multi_input_stream.test.js +339 -0
package/dist/stream/multi_input_stream.test.js.map +1 -0
package/dist/telemetry/trace_types.cjs +42 -0
package/dist/telemetry/trace_types.cjs.map +1 -1
package/dist/telemetry/trace_types.d.cts +14 -0
package/dist/telemetry/trace_types.d.ts +14 -0
package/dist/telemetry/trace_types.d.ts.map +1 -1
package/dist/telemetry/trace_types.js +28 -0
package/dist/telemetry/trace_types.js.map +1 -1
package/dist/utils.cjs +13 -0
package/dist/utils.cjs.map +1 -1
package/dist/utils.d.cts +1 -0
package/dist/utils.d.ts +1 -0
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js +13 -0
package/dist/utils.js.map +1 -1
package/dist/voice/agent_activity.cjs +35 -10
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +1 -0
package/dist/voice/agent_activity.d.ts +1 -0
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +35 -10
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +19 -7
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +3 -2
package/dist/voice/agent_session.d.ts +3 -2
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +19 -7
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +85 -36
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +22 -1
package/dist/voice/audio_recognition.d.ts +22 -1
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +89 -36
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/audio_recognition_span.test.cjs +233 -0
package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
package/dist/voice/audio_recognition_span.test.js +232 -0
package/dist/voice/audio_recognition_span.test.js.map +1 -0
package/dist/voice/io.cjs +6 -3
package/dist/voice/io.cjs.map +1 -1
package/dist/voice/io.d.cts +3 -2
package/dist/voice/io.d.ts +3 -2
package/dist/voice/io.d.ts.map +1 -1
package/dist/voice/io.js +6 -3
package/dist/voice/io.js.map +1 -1
package/dist/voice/recorder_io/recorder_io.cjs +3 -1
package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
package/dist/voice/recorder_io/recorder_io.js +3 -1
package/dist/voice/recorder_io/recorder_io.js.map +1 -1
package/dist/voice/room_io/_input.cjs +17 -17
package/dist/voice/room_io/_input.cjs.map +1 -1
package/dist/voice/room_io/_input.d.cts +2 -2
package/dist/voice/room_io/_input.d.ts +2 -2
package/dist/voice/room_io/_input.d.ts.map +1 -1
package/dist/voice/room_io/_input.js +7 -6
package/dist/voice/room_io/_input.js.map +1 -1
package/dist/voice/room_io/room_io.cjs +9 -0
package/dist/voice/room_io/room_io.cjs.map +1 -1
package/dist/voice/room_io/room_io.d.cts +3 -1
package/dist/voice/room_io/room_io.d.ts +3 -1
package/dist/voice/room_io/room_io.d.ts.map +1 -1
package/dist/voice/room_io/room_io.js +9 -0
package/dist/voice/room_io/room_io.js.map +1 -1
package/dist/voice/utils.cjs +47 -0
package/dist/voice/utils.cjs.map +1 -0
package/dist/voice/utils.d.cts +4 -0
package/dist/voice/utils.d.ts +4 -0
package/dist/voice/utils.d.ts.map +1 -0
package/dist/voice/utils.js +23 -0
package/dist/voice/utils.js.map +1 -0
package/package.json +1 -1
package/src/stream/index.ts +1 -0
package/src/stream/multi_input_stream.test.ts +540 -0
package/src/stream/multi_input_stream.ts +172 -0
package/src/telemetry/trace_types.ts +18 -0
package/src/utils.ts +16 -0
package/src/voice/agent_activity.ts +25 -0
package/src/voice/agent_session.ts +17 -11
package/src/voice/audio_recognition.ts +114 -38
package/src/voice/audio_recognition_span.test.ts +261 -0
package/src/voice/io.ts +7 -4
package/src/voice/recorder_io/recorder_io.ts +2 -1
package/src/voice/room_io/_input.ts +11 -7
package/src/voice/room_io/room_io.ts +12 -0
package/src/voice/utils.ts +29 -0

package/src/telemetry/trace_types.ts CHANGED Viewed

@@ -20,6 +20,8 @@ export const ATTR_ROOM_NAME = 'lk.room_name';
 export const ATTR_SESSION_OPTIONS = 'lk.session_options';
 // assistant turn
+export const ATTR_AGENT_TURN_ID = 'lk.generation_id';
+export const ATTR_AGENT_PARENT_TURN_ID = 'lk.parent_generation_id';
 export const ATTR_USER_INPUT = 'lk.user_input';
 export const ATTR_INSTRUCTIONS = 'lk.instructions';
 export const ATTR_SPEECH_INTERRUPTED = 'lk.interrupted';
@@ -27,10 +29,14 @@ export const ATTR_SPEECH_INTERRUPTED = 'lk.interrupted';
 // llm node
 export const ATTR_CHAT_CTX = 'lk.chat_ctx';
 export const ATTR_FUNCTION_TOOLS = 'lk.function_tools';
+export const ATTR_PROVIDER_TOOLS = 'lk.provider_tools';
+export const ATTR_TOOL_SETS = 'lk.tool_sets';
 export const ATTR_RESPONSE_TEXT = 'lk.response.text';
 export const ATTR_RESPONSE_FUNCTION_CALLS = 'lk.response.function_calls';
+export const ATTR_RESPONSE_TTFT = 'lk.response.ttft';
 // function tool
+export const ATTR_FUNCTION_TOOL_ID = 'lk.function_tool.id';
 export const ATTR_FUNCTION_TOOL_NAME = 'lk.function_tool.name';
 export const ATTR_FUNCTION_TOOL_ARGS = 'lk.function_tool.arguments';
 export const ATTR_FUNCTION_TOOL_IS_ERROR = 'lk.function_tool.is_error';
@@ -40,6 +46,7 @@ export const ATTR_FUNCTION_TOOL_OUTPUT = 'lk.function_tool.output';
 export const ATTR_TTS_INPUT_TEXT = 'lk.input_text';
 export const ATTR_TTS_STREAMING = 'lk.tts.streaming';
 export const ATTR_TTS_LABEL = 'lk.tts.label';
+export const ATTR_RESPONSE_TTFB = 'lk.response.ttfb';
 // eou detection
 export const ATTR_EOU_PROBABILITY = 'lk.eou.probability';
@@ -56,10 +63,14 @@ export const ATTR_LLM_METRICS = 'lk.llm_metrics';
 export const ATTR_TTS_METRICS = 'lk.tts_metrics';
 export const ATTR_REALTIME_MODEL_METRICS = 'lk.realtime_model_metrics';
+// latency span attributes
+export const ATTR_E2E_LATENCY = 'lk.e2e_latency';
 // OpenTelemetry GenAI attributes
 // OpenTelemetry specification: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/
 export const ATTR_GEN_AI_OPERATION_NAME = 'gen_ai.operation.name';
 export const ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model';
+export const ATTR_GEN_AI_PROVIDER_NAME = 'gen_ai.provider.name';
 export const ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens';
 export const ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens';
@@ -86,3 +97,10 @@ export const ATTR_EXCEPTION_MESSAGE = 'exception.message';
 // Platform-specific attributes
 export const ATTR_LANGFUSE_COMPLETION_START_TIME = 'langfuse.observation.completion_start_time';
+// Adaptive Interruption attributes
+export const ATTR_IS_INTERRUPTION = 'lk.is_interruption';
+export const ATTR_INTERRUPTION_PROBABILITY = 'lk.interruption.probability';
+export const ATTR_INTERRUPTION_TOTAL_DURATION = 'lk.interruption.total_duration';
+export const ATTR_INTERRUPTION_PREDICTION_DURATION = 'lk.interruption.prediction_duration';
+export const ATTR_INTERRUPTION_DETECTION_DELAY = 'lk.interruption.detection_delay';

package/src/utils.ts CHANGED Viewed

@@ -126,6 +126,8 @@ export class Future<T = void> {
   #rejectPromise!: (error: Error) => void;
   #done: boolean = false;
   #rejected: boolean = false;
+  #result: T | undefined = undefined;
+  #error: Error | undefined = undefined;
   constructor() {
     this.#await = new Promise<T>((resolve, reject) => {
@@ -142,6 +144,18 @@ export class Future<T = void> {
     return this.#done;
   }
+  get result(): T {
+    if (!this.#done) {
+      throw new Error('Future is not done');
+    }
+    if (this.#rejected) {
+      throw this.#error;
+    }
+    return this.#result!;
+  }
   /** Whether the future was rejected (cancelled) */
   get rejected() {
     return this.#rejected;
@@ -149,12 +163,14 @@ export class Future<T = void> {
   resolve(value: T) {
     this.#done = true;
+    this.#result = value;
     this.#resolvePromise(value);
   }
   reject(error: Error) {
     this.#done = true;
     this.#rejected = true;
+    this.#error = error;
     this.#rejectPromise(error);
   }
 }

package/src/voice/agent_activity.ts CHANGED Viewed

@@ -74,6 +74,7 @@ import {
 } from './generation.js';
 import type { TimedString } from './io.js';
 import { SpeechHandle } from './speech_handle.js';
+import { setParticipantSpanAttributes } from './utils.js';
 const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
@@ -299,6 +300,9 @@ export class AgentActivity implements RecognitionHooks {
         minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
         maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
         rootSpanContext: this.agentSession.rootSpanContext,
+        sttModel: this.stt?.label,
+        sttProvider: this.getSttProvider(),
+        getLinkedParticipant: () => this.agentSession._roomIO?.linkedParticipant,
       });
       this.audioRecognition.start();
       this.started = true;
@@ -335,6 +339,17 @@ export class AgentActivity implements RecognitionHooks {
     return this.agent.stt || this.agentSession.stt;
   }
+  private getSttProvider(): string | undefined {
+    const label = this.stt?.label;
+    if (!label) {
+      return undefined;
+    }
+    // Heuristic: most labels look like "<provider>-<model>"
+    const [provider] = label.split('-', 1);
+    return provider || label;
+  }
   get llm(): LLM | RealtimeModel | undefined {
     return this.agent.llm || this.agentSession.llm;
   }
@@ -1355,6 +1370,11 @@ export class AgentActivity implements RecognitionHooks {
       span.setAttribute(traceTypes.ATTR_USER_INPUT, newMessage.textContent || '');
     }
+    const localParticipant = this.agentSession._roomIO?.localParticipant;
+    if (localParticipant) {
+      setParticipantSpanAttributes(span, localParticipant);
+    }
     speechHandleStorage.enterWith(speechHandle);
     const audioOutput = this.agentSession.output.audioEnabled
@@ -1815,6 +1835,11 @@ export class AgentActivity implements RecognitionHooks {
     span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
+    const localParticipant = this.agentSession._roomIO?.localParticipant;
+    if (localParticipant) {
+      setParticipantSpanAttributes(span, localParticipant);
+    }
     speechHandleStorage.enterWith(speechHandle);
     if (!this.realtimeSession) {

package/src/voice/agent_session.ts CHANGED Viewed

@@ -62,6 +62,7 @@ import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io
 import type { UnknownUserData } from './run_context.js';
 import type { SpeechHandle } from './speech_handle.js';
 import { RunResult } from './testing/run_result.js';
+import { setParticipantSpanAttributes } from './utils.js';
 export interface VoiceOptions {
   allowInterruptions: boolean;
@@ -131,7 +132,8 @@ export class AgentSession<
   private started = false;
   private userState: UserState = 'listening';
-  private roomIO?: RoomIO;
+  /** @internal */
+  _roomIO?: RoomIO;
   private logger = log();
   private _chatCtx: ChatContext;
@@ -294,7 +296,7 @@ export class AgentSession<
     const tasks: Promise<void>[] = [];
-    if (room && !this.roomIO) {
+    if (room && !this._roomIO) {
       // Check for existing input/output configuration and warn if needed
       if (this.input.audio && inputOptions?.audioEnabled !== false) {
         this.logger.warn(
@@ -314,13 +316,13 @@ export class AgentSession<
         );
       }
-      this.roomIO = new RoomIO({
+      this._roomIO = new RoomIO({
         agentSession: this,
         room,
         inputOptions,
         outputOptions,
       });
-      this.roomIO.start();
+      this._roomIO.start();
     }
     let ctx: JobContext | undefined = undefined;
@@ -700,8 +702,10 @@ export class AgentSession<
           startTime: options?.startTime,
         });
-        // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
-        // (Ref: Python agent_session.py line 1161-1164)
+        const localParticipant = this._roomIO?.localParticipant;
+        if (localParticipant) {
+          setParticipantSpanAttributes(this.agentSpeakingSpan, localParticipant);
+        }
       }
     } else if (this.agentSpeakingSpan !== undefined) {
       // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
@@ -738,8 +742,10 @@ export class AgentSession<
         startTime: lastSpeakingTime,
       });
-      // TODO(brian): PR4 - Set participant attributes if roomIO.linkedParticipant is available
-      // (Ref: Python agent_session.py line 1192-1195)
+      const linked = this._roomIO?.linkedParticipant;
+      if (linked) {
+        setParticipantSpanAttributes(this.userSpeakingSpan, linked);
+      }
     } else if (this.userSpeakingSpan !== undefined) {
       this.userSpeakingSpan.end(lastSpeakingTime);
       this.userSpeakingSpan = undefined;
@@ -783,7 +789,7 @@ export class AgentSession<
       return;
     }
-    if (this.roomIO && !this.roomIO.isParticipantAvailable) {
+    if (this._roomIO && !this._roomIO.isParticipantAvailable) {
       return;
     }
@@ -862,8 +868,8 @@ export class AgentSession<
     this.output.audio = null;
     this.output.transcription = null;
-    await this.roomIO?.close();
-    this.roomIO = undefined;
+    await this._roomIO?.close();
+    this._roomIO = undefined;
     await this.activity?.close();
     this.activity = undefined;

package/src/voice/audio_recognition.ts CHANGED Viewed

@@ -1,8 +1,15 @@
 // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+import type { ParticipantKind } from '@livekit/rtc-node';
 import { AudioFrame } from '@livekit/rtc-node';
-import type { Context, Span } from '@opentelemetry/api';
+import {
+  type Context,
+  ROOT_CONTEXT,
+  type Span,
+  context as otelContext,
+  trace,
+} from '@opentelemetry/api';
 import type { WritableStreamDefaultWriter } from 'node:stream/web';
 import { ReadableStream } from 'node:stream/web';
 import { type ChatContext } from '../llm/chat_context.js';
@@ -16,6 +23,7 @@ import { Task, delay } from '../utils.js';
 import { type VAD, type VADEvent, VADEventType } from '../vad.js';
 import type { TurnDetectionMode } from './agent_session.js';
 import type { STTNode } from './io.js';
+import { setParticipantSpanAttributes } from './utils.js';
 export interface EndOfTurnInfo {
   /** The new transcript text from the user's speech. */
@@ -72,6 +80,22 @@ export interface AudioRecognitionOptions {
   maxEndpointingDelay: number;
   /** Root span context for tracing. */
   rootSpanContext?: Context;
+  /** STT model name for tracing */
+  sttModel?: string;
+  /** STT provider name for tracing */
+  sttProvider?: string;
+  /** Getter for linked participant for span attribution */
+  getLinkedParticipant?: () => ParticipantLike | undefined;
+}
+/**
+ * Minimal participant shape for span attribution.
+ * Compatible with both `LocalParticipant` and `RemoteParticipant` from `@livekit/rtc-node`.
+ */
+export interface ParticipantLike {
+  sid: string | undefined;
+  identity: string;
+  kind: ParticipantKind;
 }
 export class AudioRecognition {
@@ -84,6 +108,9 @@ export class AudioRecognition {
   private maxEndpointingDelay: number;
   private lastLanguage?: string;
   private rootSpanContext?: Context;
+  private sttModel?: string;
+  private sttProvider?: string;
+  private getLinkedParticipant?: () => ParticipantLike | undefined;
   private deferredInputStream: DeferredReadableStream<AudioFrame>;
   private logger = log();
@@ -121,6 +148,9 @@ export class AudioRecognition {
     this.maxEndpointingDelay = opts.maxEndpointingDelay;
     this.lastLanguage = undefined;
     this.rootSpanContext = opts.rootSpanContext;
+    this.sttModel = opts.sttModel;
+    this.sttProvider = opts.sttProvider;
+    this.getLinkedParticipant = opts.getLinkedParticipant;
     this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
     const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
@@ -151,6 +181,37 @@ export class AudioRecognition {
     });
   }
+  private ensureUserTurnSpan(startTime?: number): Span {
+    if (this.userTurnSpan && this.userTurnSpan.isRecording()) {
+      return this.userTurnSpan;
+    }
+    this.userTurnSpan = tracer.startSpan({
+      name: 'user_turn',
+      context: this.rootSpanContext,
+      startTime,
+    });
+    const participant = this.getLinkedParticipant?.();
+    if (participant) {
+      setParticipantSpanAttributes(this.userTurnSpan, participant);
+    }
+    if (this.sttModel) {
+      this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.sttModel);
+    }
+    if (this.sttProvider) {
+      this.userTurnSpan.setAttribute(traceTypes.ATTR_GEN_AI_PROVIDER_NAME, this.sttProvider);
+    }
+    return this.userTurnSpan;
+  }
+  private userTurnContext(span: Span): Context {
+    const base = this.rootSpanContext ?? ROOT_CONTEXT;
+    return trace.setSpan(base, span);
+  }
   private async onSTTEvent(ev: SpeechEvent) {
     if (
       this.turnDetectionMode === 'manual' &&
@@ -299,19 +360,25 @@ export class AudioRecognition {
         break;
       case SpeechEventType.START_OF_SPEECH:
         if (this.turnDetectionMode !== 'stt') break;
-        this.hooks.onStartOfSpeech({
-          type: VADEventType.START_OF_SPEECH,
-          samplesIndex: 0,
-          timestamp: Date.now(),
-          speechDuration: 0,
-          silenceDuration: 0,
-          frames: [],
-          probability: 0,
-          inferenceDuration: 0,
-          speaking: true,
-          rawAccumulatedSilence: 0,
-          rawAccumulatedSpeech: 0,
-        });
+        {
+          const span = this.ensureUserTurnSpan(Date.now());
+          const ctx = this.userTurnContext(span);
+          otelContext.with(ctx, () => {
+            this.hooks.onStartOfSpeech({
+              type: VADEventType.START_OF_SPEECH,
+              samplesIndex: 0,
+              timestamp: Date.now(),
+              speechDuration: 0,
+              silenceDuration: 0,
+              frames: [],
+              probability: 0,
+              inferenceDuration: 0,
+              speaking: true,
+              rawAccumulatedSilence: 0,
+              rawAccumulatedSpeech: 0,
+            });
+          });
+        }
         this.speaking = true;
         this.lastSpeakingTime = Date.now();
@@ -319,19 +386,25 @@ export class AudioRecognition {
         break;
       case SpeechEventType.END_OF_SPEECH:
         if (this.turnDetectionMode !== 'stt') break;
-        this.hooks.onEndOfSpeech({
-          type: VADEventType.END_OF_SPEECH,
-          samplesIndex: 0,
-          timestamp: Date.now(),
-          speechDuration: 0,
-          silenceDuration: 0,
-          frames: [],
-          probability: 0,
-          inferenceDuration: 0,
-          speaking: false,
-          rawAccumulatedSilence: 0,
-          rawAccumulatedSpeech: 0,
-        });
+        {
+          const span = this.ensureUserTurnSpan();
+          const ctx = this.userTurnContext(span);
+          otelContext.with(ctx, () => {
+            this.hooks.onEndOfSpeech({
+              type: VADEventType.END_OF_SPEECH,
+              samplesIndex: 0,
+              timestamp: Date.now(),
+              speechDuration: 0,
+              silenceDuration: 0,
+              frames: [],
+              probability: 0,
+              inferenceDuration: 0,
+              speaking: false,
+              rawAccumulatedSilence: 0,
+              rawAccumulatedSpeech: 0,
+            });
+          });
+        }
         this.speaking = false;
         this.userTurnCommitted = true;
         this.lastSpeakingTime = Date.now();
@@ -376,6 +449,9 @@ export class AudioRecognition {
       async (controller: AbortController) => {
         let endpointingDelay = this.minEndpointingDelay;
+        const userTurnSpan = this.ensureUserTurnSpan();
+        const userTurnCtx = this.userTurnContext(userTurnSpan);
         if (turnDetector) {
           await tracer.startActiveSpan(
             async (span) => {
@@ -415,7 +491,7 @@ export class AudioRecognition {
             },
             {
               name: 'eou_detection',
-              context: this.rootSpanContext,
+              context: userTurnCtx,
             },
           );
         }
@@ -577,17 +653,13 @@ export class AudioRecognition {
         switch (ev.type) {
           case VADEventType.START_OF_SPEECH:
             this.logger.debug('VAD task: START_OF_SPEECH');
-            this.hooks.onStartOfSpeech(ev);
-            this.speaking = true;
-            if (!this.userTurnSpan) {
+            {
               const startTime = Date.now() - ev.speechDuration;
-              this.userTurnSpan = tracer.startSpan({
-                name: 'user_turn',
-                context: this.rootSpanContext,
-                startTime,
-              });
+              const span = this.ensureUserTurnSpan(startTime);
+              const ctx = this.userTurnContext(span);
+              otelContext.with(ctx, () => this.hooks.onStartOfSpeech(ev));
             }
+            this.speaking = true;
             // Capture sample rate from the first VAD event if not already set
             if (ev.frames.length > 0 && ev.frames[0]) {
@@ -609,7 +681,11 @@ export class AudioRecognition {
             break;
           case VADEventType.END_OF_SPEECH:
             this.logger.debug('VAD task: END_OF_SPEECH');
-            this.hooks.onEndOfSpeech(ev);
+            {
+              const span = this.ensureUserTurnSpan();
+              const ctx = this.userTurnContext(span);
+              otelContext.with(ctx, () => this.hooks.onEndOfSpeech(ev));
+            }
             // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
             this.speaking = false;