npm - @livekit/agents - Versions diffs - 1.0.36-dev.0 → 1.0.37 - Mend

@livekit/agents 1.0.36-dev.0 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (176) hide show

package/dist/index.cjs +1 -3
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +0 -1
package/dist/index.d.ts +0 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +0 -1
package/dist/index.js.map +1 -1
package/dist/inference/utils.cjs +2 -15
package/dist/inference/utils.cjs.map +1 -1
package/dist/inference/utils.d.cts +0 -1
package/dist/inference/utils.d.ts +0 -1
package/dist/inference/utils.d.ts.map +1 -1
package/dist/inference/utils.js +1 -13
package/dist/inference/utils.js.map +1 -1
package/dist/stream/stream_channel.cjs +0 -3
package/dist/stream/stream_channel.cjs.map +1 -1
package/dist/stream/stream_channel.d.cts +2 -3
package/dist/stream/stream_channel.d.ts +2 -3
package/dist/stream/stream_channel.d.ts.map +1 -1
package/dist/stream/stream_channel.js +0 -3
package/dist/stream/stream_channel.js.map +1 -1
package/dist/telemetry/trace_types.cjs +0 -15
package/dist/telemetry/trace_types.cjs.map +1 -1
package/dist/telemetry/trace_types.d.cts +0 -5
package/dist/telemetry/trace_types.d.ts +0 -5
package/dist/telemetry/trace_types.d.ts.map +1 -1
package/dist/telemetry/trace_types.js +0 -10
package/dist/telemetry/trace_types.js.map +1 -1
package/dist/voice/agent_activity.cjs +19 -68
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.cts +0 -14
package/dist/voice/agent_activity.d.ts +0 -14
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +19 -68
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +65 -37
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +25 -4
package/dist/voice/agent_session.d.ts +25 -4
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +65 -37
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/audio_recognition.cjs +2 -124
package/dist/voice/audio_recognition.cjs.map +1 -1
package/dist/voice/audio_recognition.d.cts +1 -32
package/dist/voice/audio_recognition.d.ts +1 -32
package/dist/voice/audio_recognition.d.ts.map +1 -1
package/dist/voice/audio_recognition.js +2 -127
package/dist/voice/audio_recognition.js.map +1 -1
package/dist/voice/index.cjs +14 -1
package/dist/voice/index.cjs.map +1 -1
package/dist/voice/index.d.cts +1 -0
package/dist/voice/index.d.ts +1 -0
package/dist/voice/index.d.ts.map +1 -1
package/dist/voice/index.js +3 -1
package/dist/voice/index.js.map +1 -1
package/dist/voice/room_io/room_io.cjs +1 -0
package/dist/voice/room_io/room_io.cjs.map +1 -1
package/dist/voice/room_io/room_io.d.ts.map +1 -1
package/dist/voice/room_io/room_io.js +1 -0
package/dist/voice/room_io/room_io.js.map +1 -1
package/dist/voice/speech_handle.cjs +12 -3
package/dist/voice/speech_handle.cjs.map +1 -1
package/dist/voice/speech_handle.d.cts +12 -2
package/dist/voice/speech_handle.d.ts +12 -2
package/dist/voice/speech_handle.d.ts.map +1 -1
package/dist/voice/speech_handle.js +10 -2
package/dist/voice/speech_handle.js.map +1 -1
package/dist/voice/testing/index.cjs +54 -0
package/dist/voice/testing/index.cjs.map +1 -0
package/dist/voice/testing/index.d.cts +20 -0
package/dist/voice/testing/index.d.ts +20 -0
package/dist/voice/testing/index.d.ts.map +1 -0
package/dist/voice/testing/index.js +33 -0
package/dist/voice/testing/index.js.map +1 -0
package/dist/voice/testing/run_result.cjs +766 -0
package/dist/voice/testing/run_result.cjs.map +1 -0
package/dist/voice/testing/run_result.d.cts +374 -0
package/dist/voice/testing/run_result.d.ts +374 -0
package/dist/voice/testing/run_result.d.ts.map +1 -0
package/dist/voice/testing/run_result.js +739 -0
package/dist/voice/testing/run_result.js.map +1 -0
package/dist/{inference/interruption/index.cjs → voice/testing/types.cjs} +24 -12
package/dist/voice/testing/types.cjs.map +1 -0
package/dist/voice/testing/types.d.cts +83 -0
package/dist/voice/testing/types.d.ts +83 -0
package/dist/voice/testing/types.d.ts.map +1 -0
package/dist/voice/testing/types.js +19 -0
package/dist/voice/testing/types.js.map +1 -0
package/package.json +3 -4
package/src/index.ts +0 -2
package/src/inference/utils.ts +0 -15
package/src/stream/stream_channel.ts +2 -6
package/src/telemetry/trace_types.ts +0 -7
package/src/voice/agent_activity.ts +24 -83
package/src/voice/agent_session.ts +74 -49
package/src/voice/audio_recognition.ts +1 -161
package/src/voice/index.ts +1 -0
package/src/voice/room_io/room_io.ts +1 -0
package/src/voice/speech_handle.ts +24 -4
package/src/voice/testing/index.ts +50 -0
package/src/voice/testing/run_result.ts +937 -0
package/src/voice/testing/types.ts +118 -0
package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +0 -152
package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +0 -1
package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +0 -50
package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +0 -50
package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +0 -1
package/dist/inference/interruption/AdaptiveInterruptionDetector.js +0 -125
package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +0 -1
package/dist/inference/interruption/InterruptionStream.cjs +0 -310
package/dist/inference/interruption/InterruptionStream.cjs.map +0 -1
package/dist/inference/interruption/InterruptionStream.d.cts +0 -57
package/dist/inference/interruption/InterruptionStream.d.ts +0 -57
package/dist/inference/interruption/InterruptionStream.d.ts.map +0 -1
package/dist/inference/interruption/InterruptionStream.js +0 -288
package/dist/inference/interruption/InterruptionStream.js.map +0 -1
package/dist/inference/interruption/defaults.cjs +0 -76
package/dist/inference/interruption/defaults.cjs.map +0 -1
package/dist/inference/interruption/defaults.d.cts +0 -14
package/dist/inference/interruption/defaults.d.ts +0 -14
package/dist/inference/interruption/defaults.d.ts.map +0 -1
package/dist/inference/interruption/defaults.js +0 -42
package/dist/inference/interruption/defaults.js.map +0 -1
package/dist/inference/interruption/errors.cjs +0 -2
package/dist/inference/interruption/errors.cjs.map +0 -1
package/dist/inference/interruption/errors.d.cts +0 -2
package/dist/inference/interruption/errors.d.ts +0 -2
package/dist/inference/interruption/errors.d.ts.map +0 -1
package/dist/inference/interruption/errors.js +0 -1
package/dist/inference/interruption/errors.js.map +0 -1
package/dist/inference/interruption/http_transport.cjs +0 -57
package/dist/inference/interruption/http_transport.cjs.map +0 -1
package/dist/inference/interruption/http_transport.d.cts +0 -23
package/dist/inference/interruption/http_transport.d.ts +0 -23
package/dist/inference/interruption/http_transport.d.ts.map +0 -1
package/dist/inference/interruption/http_transport.js +0 -33
package/dist/inference/interruption/http_transport.js.map +0 -1
package/dist/inference/interruption/index.cjs.map +0 -1
package/dist/inference/interruption/index.d.cts +0 -5
package/dist/inference/interruption/index.d.ts +0 -5
package/dist/inference/interruption/index.d.ts.map +0 -1
package/dist/inference/interruption/index.js +0 -7
package/dist/inference/interruption/index.js.map +0 -1
package/dist/inference/interruption/interruption.cjs +0 -85
package/dist/inference/interruption/interruption.cjs.map +0 -1
package/dist/inference/interruption/interruption.d.cts +0 -48
package/dist/inference/interruption/interruption.d.ts +0 -48
package/dist/inference/interruption/interruption.d.ts.map +0 -1
package/dist/inference/interruption/interruption.js +0 -59
package/dist/inference/interruption/interruption.js.map +0 -1
package/dist/inference/utils.test.cjs +0 -20
package/dist/inference/utils.test.cjs.map +0 -1
package/dist/inference/utils.test.js +0 -19
package/dist/inference/utils.test.js.map +0 -1
package/dist/utils/ws_transport.cjs +0 -51
package/dist/utils/ws_transport.cjs.map +0 -1
package/dist/utils/ws_transport.d.cts +0 -9
package/dist/utils/ws_transport.d.ts +0 -9
package/dist/utils/ws_transport.d.ts.map +0 -1
package/dist/utils/ws_transport.js +0 -17
package/dist/utils/ws_transport.js.map +0 -1
package/dist/utils/ws_transport.test.cjs +0 -212
package/dist/utils/ws_transport.test.cjs.map +0 -1
package/dist/utils/ws_transport.test.js +0 -211
package/dist/utils/ws_transport.test.js.map +0 -1
package/src/inference/interruption/AdaptiveInterruptionDetector.ts +0 -166
package/src/inference/interruption/InterruptionStream.ts +0 -397
package/src/inference/interruption/defaults.ts +0 -33
package/src/inference/interruption/errors.ts +0 -0
package/src/inference/interruption/http_transport.ts +0 -61
package/src/inference/interruption/index.ts +0 -4
package/src/inference/interruption/interruption.ts +0 -88
package/src/inference/utils.test.ts +0 -31
package/src/utils/ws_transport.test.ts +0 -282
package/src/utils/ws_transport.ts +0 -22

package/src/voice/agent_activity.ts CHANGED Viewed

@@ -41,8 +41,6 @@ import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js
 import { splitWords } from '../tokenize/basic/word.js';
 import { TTS, type TTSError } from '../tts/tts.js';
 import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
-import type { InterruptionEvent } from '../inference/interruption/interruption.js';
-import { InterruptionEventType } from '../inference/interruption/interruption.js';
 import { VAD, type VADEvent } from '../vad.js';
 import type { Agent, ModelSettings } from './agent.js';
 import { StopResponse, asyncLocalStorage } from './agent.js';
@@ -114,24 +112,6 @@ export class AgentActivity implements RecognitionHooks {
   _mainTask?: Task<void>;
   _userTurnCompletedTask?: Promise<void>;
-  /**
-   * Notify that agent started speaking.
-   * This enables interruption detection in AudioRecognition.
-   * @internal
-   */
-  notifyAgentSpeechStarted(): void {
-    this.audioRecognition?.onStartOfAgentSpeech();
-  }
-  /**
-   * Notify that agent stopped speaking.
-   * This disables interruption detection in AudioRecognition.
-   * @internal
-   */
-  notifyAgentSpeechEnded(): void {
-    this.audioRecognition?.onEndOfAgentSpeech();
-  }
   constructor(agent: Agent, agentSession: AgentSession) {
     this.agent = agent;
     this.agentSession = agentSession;
@@ -312,7 +292,6 @@ export class AgentActivity implements RecognitionHooks {
         // Disable stt node if stt is not provided
         stt: this.stt ? (...args) => this.agent.sttNode(...args) : undefined,
         vad: this.vad,
-        interruptionDetector: this.agentSession.interruptionDetector,
         turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
         turnDetectionMode: this.turnDetectionMode,
         minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
@@ -718,46 +697,6 @@ export class AgentActivity implements RecognitionHooks {
     }
   }
-  onInterruption(ev: InterruptionEvent): void {
-    if (ev.type !== InterruptionEventType.INTERRUPTION) {
-      // Only handle actual interruptions, not overlap_speech_ended events
-      return;
-    }
-    this.logger.info(
-      {
-        probability: ev.probability,
-        detectionDelay: ev.detectionDelay,
-        totalDuration: ev.totalDuration,
-      },
-      'adaptive interruption detected',
-    );
-    // Similar to onVADInferenceDone but triggered by the adaptive interruption detector
-    if (this.turnDetection === 'manual' || this.turnDetection === 'realtime_llm') {
-      return;
-    }
-    if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
-      return;
-    }
-    this.realtimeSession?.startUserActivity();
-    if (
-      this._currentSpeech &&
-      !this._currentSpeech.interrupted &&
-      this._currentSpeech.allowInterruptions
-    ) {
-      this.logger.info(
-        { 'speech id': this._currentSpeech.id },
-        'speech interrupted by adaptive interruption detector',
-      );
-      this.realtimeSession?.interrupt();
-      this._currentSpeech.interrupt();
-    }
-  }
   onInterimTranscript(ev: SpeechEvent): void {
     if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
       // skip stt transcription if userTranscription is enabled on the realtime model
@@ -1411,11 +1350,14 @@ export class AgentActivity implements RecognitionHooks {
     );
     tasks.push(llmTask);
-    const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
     let ttsTask: Task<void> | null = null;
     let ttsStream: ReadableStream<AudioFrame> | null = null;
+    let llmOutput: ReadableStream<string>;
     if (audioOutput) {
+      // Only tee the stream when we need TTS
+      const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
+      llmOutput = textOutput;
       [ttsTask, ttsStream] = performTTSInference(
         (...args) => this.agent.ttsNode(...args),
         ttsTextInput,
@@ -1423,6 +1365,9 @@ export class AgentActivity implements RecognitionHooks {
         replyAbortController,
       );
       tasks.push(ttsTask);
+    } else {
+      // No TTS needed, use the stream directly
+      llmOutput = llmGenData.textStream;
     }
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1482,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
     //TODO(AJS-272): before executing tools, make sure we generated all the text
     // (this ensure everything is kept ordered)
-    const onToolExecutionStarted = (_: FunctionCall) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionStarted = (f: FunctionCall) => {
+      speechHandle._itemAdded([f]);
+      this.agent._chatCtx.items.push(f);
+      this.agentSession._toolItemsAdded([f]);
     };
-    const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
+      if (out.toolCallOutput) {
+        speechHandle._itemAdded([out.toolCallOutput]);
+      }
     };
     const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1562,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
         });
         chatCtx.insert(message);
         this.agent._chatCtx.insert(message);
+        speechHandle._itemAdded([message]);
         this.agentSession._conversationItemAdded(message);
       }
@@ -1589,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
       });
       chatCtx.insert(message);
       this.agent._chatCtx.insert(message);
+      speechHandle._itemAdded([message]);
       this.agentSession._conversationItemAdded(message);
       this.logger.info(
         { speech_id: speechHandle.id, message: textOut.text },
@@ -1673,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
     if (shouldGenerateToolReply) {
       chatCtx.insert(toolMessages);
-      const handle = SpeechHandle.create({
-        allowInterruptions: speechHandle.allowInterruptions,
-        stepIndex: speechHandle._stepIndex + 1,
-        parent: speechHandle,
-      });
-      this.agentSession.emit(
-        AgentSessionEventTypes.SpeechCreated,
-        createSpeechCreatedEvent({
-          userInitiated: false,
-          source: 'tool_response',
-          speechHandle: handle,
-        }),
-      );
+      // Increment step count on SAME handle (parity with Python agent_activity.py L2081)
+      speechHandle._numSteps += 1;
       // Avoid setting tool_choice to "required" or a specific function when
       // passing tool response back to the LLM
       const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
+      // Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
       const toolResponseTask = this.createSpeechTask({
         task: Task.from(() =>
           this.pipelineReplyTask(
-            handle,
+            speechHandle,
             chatCtx,
             toolCtx,
             { toolChoice: respondToolChoice },
@@ -1704,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
             toolMessages,
           ),
         ),
-        ownedSpeechHandle: handle,
+        ownedSpeechHandle: speechHandle,
         name: 'AgentActivity.pipelineReply',
       });
       toolResponseTask.finally(() => this.onPipelineReplyDone());
-      this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
+      this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
     } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
       for (const msg of toolMessages) {
         msg.createdAt = replyStartedAt;

package/src/voice/agent_session.ts CHANGED Viewed

@@ -15,7 +15,6 @@ import {
   type STTModelString,
   type TTSModelString,
 } from '../inference/index.js';
-import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
 import { type JobContext, getJobContext } from '../job.js';
 import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
 import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
@@ -62,6 +61,7 @@ import { RecorderIO } from './recorder_io/index.js';
 import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
 import type { UnknownUserData } from './run_context.js';
 import type { SpeechHandle } from './speech_handle.js';
+import { RunResult } from './testing/run_result.js';
 export interface VoiceOptions {
   allowInterruptions: boolean;
@@ -107,7 +107,6 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
   vad?: VAD;
   llm?: LLM | RealtimeModel | LLMModels;
   tts?: TTS | TTSModelString;
-  interruptionDetector?: AdaptiveInterruptionDetector;
   userData?: UserData;
   voiceOptions?: Partial<VoiceOptions>;
   connOptions?: SessionConnectOptions;
@@ -169,7 +168,8 @@ export class AgentSession<
   /** @internal - Timestamp when the session started (milliseconds) */
   _startedAt?: number;
-  interruptionDetector?: AdaptiveInterruptionDetector;
+  /** @internal - Current run state for testing */
+  _globalRunState?: RunResult;
   constructor(opts: AgentSessionOptions<UserData>) {
     super();
@@ -180,7 +180,6 @@ export class AgentSession<
       llm,
       tts,
       turnDetection,
-      interruptionDetector,
       userData,
       voiceOptions = defaultVoiceOptions,
       connOptions,
@@ -217,7 +216,6 @@ export class AgentSession<
     }
     this.turnDetection = turnDetection;
-    this.interruptionDetector = interruptionDetector;
     this._userData = userData;
     // configurable IO
@@ -278,7 +276,7 @@ export class AgentSession<
     span,
   }: {
     agent: Agent;
-    room: Room;
+    room?: Room;
     inputOptions?: Partial<RoomInputOptions>;
     outputOptions?: Partial<RoomOutputOptions>;
     span: Span;
@@ -289,41 +287,45 @@ export class AgentSession<
     this._updateAgentState('initializing');
     const tasks: Promise<void>[] = [];
-    // Check for existing input/output configuration and warn if needed
-    if (this.input.audio && inputOptions?.audioEnabled !== false) {
-      this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
-    }
-    if (this.output.audio && outputOptions?.audioEnabled !== false) {
-      this.logger.warn(
-        'RoomIO audio output is enabled but output.audio is already set, ignoring..',
-      );
-    }
+    if (room && !this.roomIO) {
+      // Check for existing input/output configuration and warn if needed
+      if (this.input.audio && inputOptions?.audioEnabled !== false) {
+        this.logger.warn(
+          'RoomIO audio input is enabled but input.audio is already set, ignoring..',
+        );
+      }
-    if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
-      this.logger.warn(
-        'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
-      );
-    }
+      if (this.output.audio && outputOptions?.audioEnabled !== false) {
+        this.logger.warn(
+          'RoomIO audio output is enabled but output.audio is already set, ignoring..',
+        );
+      }
-    this.roomIO = new RoomIO({
-      agentSession: this,
-      room,
-      inputOptions,
-      outputOptions,
-    });
-    this.roomIO.start();
+      if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
+        this.logger.warn(
+          'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
+        );
+      }
+      this.roomIO = new RoomIO({
+        agentSession: this,
+        room,
+        inputOptions,
+        outputOptions,
+      });
+      this.roomIO.start();
+    }
     let ctx: JobContext | undefined = undefined;
     try {
       ctx = getJobContext();
-    } catch (error) {
+    } catch {
       // JobContext is not available in evals
-      this.logger.warn('JobContext is not available');
     }
     if (ctx) {
-      if (ctx.room === room && !room.isConnected) {
+      if (room && ctx.room === room && !room.isConnected) {
         this.logger.debug('Auto-connecting to room via job context');
         tasks.push(ctx.connect());
       }
@@ -376,7 +378,7 @@ export class AgentSession<
     record,
   }: {
     agent: Agent;
-    room: Room;
+    room?: Room;
     inputOptions?: Partial<RoomInputOptions>;
     outputOptions?: Partial<RoomOutputOptions>;
     record?: boolean;
@@ -503,13 +505,50 @@ export class AgentSession<
     // attach to the session span if called outside of the AgentSession
     const activeSpan = trace.getActiveSpan();
+    let handle: SpeechHandle;
     if (!activeSpan && this.rootSpanContext) {
-      return otelContext.with(this.rootSpanContext, () =>
+      handle = otelContext.with(this.rootSpanContext, () =>
         doGenerateReply(this.activity!, this.nextActivity),
       );
+    } else {
+      handle = doGenerateReply(this.activity!, this.nextActivity);
     }
-    return doGenerateReply(this.activity!, this.nextActivity);
+    if (this._globalRunState) {
+      this._globalRunState._watchHandle(handle);
+    }
+    return handle;
+  }
+  /**
+   * Run a test with user input and return a result for assertions.
+   *
+   * This method is primarily used for testing agent behavior without
+   * requiring a real room connection.
+   *
+   * @example
+   * ```typescript
+   * const result = await session.run({ userInput: 'Hello' });
+   * result.expect.nextEvent().isMessage({ role: 'assistant' });
+   * result.expect.noMoreEvents();
+   * ```
+   *
+   * @param options - Run options including user input
+   * @returns A RunResult that resolves when the agent finishes responding
+   *
+   * TODO: Add outputType parameter for typed outputs (parity with Python)
+   */
+  run(options: { userInput: string }): RunResult {
+    if (this._globalRunState && !this._globalRunState.done()) {
+      throw new Error('nested runs are not supported');
+    }
+    const runState = new RunResult({ userInput: options.userInput });
+    this._globalRunState = runState;
+    this.generateReply({ userInput: options.userInput });
+    return runState;
   }
   private async updateActivity(agent: Agent): Promise<void> {
@@ -643,8 +682,6 @@ export class AgentSession<
       return;
     }
-    const oldState = this._agentState;
     if (state === 'speaking') {
       // Reset error counts when agent starts speaking
       this.llmErrorCounts = 0;
@@ -659,25 +696,13 @@ export class AgentSession<
         // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
         // (Ref: Python agent_session.py line 1161-1164)
       }
-      // Notify AudioRecognition that agent started speaking (for interruption detection)
-      this.activity?.notifyAgentSpeechStarted();
-    } else if (oldState === 'speaking') {
-      // Agent stopped speaking
-      if (this.agentSpeakingSpan !== undefined) {
-        // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
-        this.agentSpeakingSpan.end();
-        this.agentSpeakingSpan = undefined;
-      }
-      // Notify AudioRecognition that agent stopped speaking (for interruption detection)
-      this.activity?.notifyAgentSpeechEnded();
     } else if (this.agentSpeakingSpan !== undefined) {
-      // Non-speaking to non-speaking transition but span is still open
+      // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
       this.agentSpeakingSpan.end();
       this.agentSpeakingSpan = undefined;
     }
+    const oldState = this._agentState;
     this._agentState = state;
     // Handle user away timer based on state changes

package/src/voice/audio_recognition.ts CHANGED Viewed

@@ -5,12 +5,6 @@ import { AudioFrame } from '@livekit/rtc-node';
 import type { Context, Span } from '@opentelemetry/api';
 import type { WritableStreamDefaultWriter } from 'node:stream/web';
 import { ReadableStream } from 'node:stream/web';
-import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
-import {
-  InterruptionStreamBase,
-  InterruptionStreamSentinel,
-} from '../inference/interruption/InterruptionStream.js';
-import type { InterruptionEvent } from '../inference/interruption/interruption.js';
 import { type ChatContext } from '../llm/chat_context.js';
 import { log } from '../log.js';
 import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
@@ -45,7 +39,6 @@ export interface RecognitionHooks {
   onFinalTranscript: (ev: SpeechEvent) => void;
   onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;
   onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;
-  onInterruption: (ev: InterruptionEvent) => void;
   retrieveChatCtx: () => ChatContext;
 }
@@ -60,7 +53,6 @@ export interface AudioRecognitionOptions {
   recognitionHooks: RecognitionHooks;
   stt?: STTNode;
   vad?: VAD;
-  interruptionDetector?: AdaptiveInterruptionDetector;
   turnDetector?: _TurnDetector;
   turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
   minEndpointingDelay: number;
@@ -96,7 +88,6 @@ export class AudioRecognition {
   private vadInputStream: ReadableStream<AudioFrame>;
   private sttInputStream: ReadableStream<AudioFrame>;
-  private interruptionInputStream: ReadableStream<AudioFrame>;
   private silenceAudioTransform = new IdentityTransform<AudioFrame>();
   private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;
@@ -105,19 +96,11 @@ export class AudioRecognition {
   private commitUserTurnTask?: Task<void>;
   private vadTask?: Task<void>;
   private sttTask?: Task<void>;
-  private interruptionTask?: Task<void>;
-  // interruption detection
-  private interruptionDetector?: AdaptiveInterruptionDetector;
-  private interruptionStream?: InterruptionStreamBase;
-  private interruptionEnabled = false;
-  private agentSpeaking = false;
   constructor(opts: AudioRecognitionOptions) {
     this.hooks = opts.recognitionHooks;
     this.stt = opts.stt;
     this.vad = opts.vad;
-    this.interruptionDetector = opts.interruptionDetector;
     this.turnDetector = opts.turnDetector;
     this.turnDetectionMode = opts.turnDetectionMode;
     this.minEndpointingDelay = opts.minEndpointingDelay;
@@ -125,15 +108,10 @@ export class AudioRecognition {
     this.lastLanguage = undefined;
     this.rootSpanContext = opts.rootSpanContext;
-    // Interruption detection is only enabled if both detector and VAD are provided
-    this.interruptionEnabled = this.interruptionDetector !== undefined && this.vad !== undefined;
     this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
-    const [vadInputStream, rest] = this.deferredInputStream.stream.tee();
-    const [sttInputStream, interruptionInputStream] = rest.tee();
+    const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
     this.vadInputStream = vadInputStream;
     this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
-    this.interruptionInputStream = interruptionInputStream;
     this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
   }
@@ -157,15 +135,6 @@ export class AudioRecognition {
     this.sttTask.result.catch((err) => {
       this.logger.error(`Error running STT task: ${err}`);
     });
-    if (this.interruptionEnabled && this.interruptionDetector) {
-      this.interruptionTask = Task.from(({ signal }) =>
-        this.createInterruptionTask(this.interruptionDetector!, signal),
-      );
-      this.interruptionTask.result.catch((err) => {
-        this.logger.error(`Error running interruption task: ${err}`);
-      });
-    }
   }
   private async onSTTEvent(ev: SpeechEvent) {
@@ -608,11 +577,6 @@ export class AudioRecognition {
               this.sampleRate = ev.frames[0].sampleRate;
             }
-            // If agent is speaking, user speech is overlap - trigger interruption detection
-            if (this.agentSpeaking && this.interruptionEnabled) {
-              this.onStartOfOverlapSpeech(ev.speechDuration, this.userTurnSpan);
-            }
             this.bounceEOUTask?.cancel();
             break;
           case VADEventType.INFERENCE_DONE:
@@ -633,11 +597,6 @@ export class AudioRecognition {
             // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
             this.speaking = false;
-            // If we were in overlap speech (agent speaking + user speaking), end it
-            if (this.agentSpeaking && this.interruptionEnabled) {
-              this.onEndOfOverlapSpeech();
-            }
             if (
               this.vadBaseTurnDetection ||
               (this.turnDetectionMode === 'stt' && this.userTurnCommitted)
@@ -655,123 +614,6 @@ export class AudioRecognition {
     }
   }
-  private async createInterruptionTask(
-    interruptionDetector: AdaptiveInterruptionDetector,
-    signal: AbortSignal,
-  ) {
-    // Create the interruption stream from the detector
-    this.interruptionStream = interruptionDetector.createStream();
-    // Forward audio frames to the interruption stream
-    const reader = this.interruptionInputStream.getReader();
-    const forwardTask = (async () => {
-      try {
-        while (!signal.aborted) {
-          const { done, value: frame } = await reader.read();
-          if (done) break;
-          await this.interruptionStream?.pushFrame(frame);
-        }
-      } catch (e) {
-        if (!signal.aborted) {
-          this.logger.error(e, 'Error forwarding audio to interruption stream');
-        }
-      } finally {
-        reader.releaseLock();
-      }
-    })();
-    // Read interruption events from the stream
-    const eventStream = this.interruptionStream.stream;
-    const eventReader = eventStream.getReader();
-    const abortHandler = () => {
-      eventReader.releaseLock();
-      this.interruptionStream?.close();
-      signal.removeEventListener('abort', abortHandler);
-    };
-    signal.addEventListener('abort', abortHandler);
-    try {
-      while (!signal.aborted) {
-        const { done, value: ev } = await eventReader.read();
-        if (done) break;
-        this.logger.debug({ type: ev.type, probability: ev.probability }, 'Interruption event');
-        this.hooks.onInterruption(ev);
-      }
-    } catch (e) {
-      if (!signal.aborted) {
-        this.logger.error(e, 'Error in interruption task');
-      }
-    } finally {
-      this.logger.debug('Interruption task closed');
-      await forwardTask;
-    }
-  }
-  /**
-   * Called when the agent starts speaking.
-   * Enables interruption detection by sending the agent-speech-started sentinel.
-   */
-  onStartOfAgentSpeech(): void {
-    this.agentSpeaking = true;
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechStarted());
-  }
-  /**
-   * Called when the agent stops speaking.
-   * Disables interruption detection by sending the agent-speech-ended sentinel.
-   */
-  onEndOfAgentSpeech(): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      this.agentSpeaking = false;
-      return;
-    }
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechEnded());
-    if (this.agentSpeaking) {
-      // No interruption was detected, end the overlap inference (idempotent)
-      this.onEndOfOverlapSpeech();
-    }
-    this.agentSpeaking = false;
-  }
-  /**
-   * Called when user starts speaking while agent is speaking (overlap speech).
-   * This triggers the interruption detection inference.
-   */
-  onStartOfOverlapSpeech(speechDuration: number, userSpeakingSpan?: Span): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-    if (this.agentSpeaking && userSpeakingSpan) {
-      this.interruptionStream.pushFrame(
-        InterruptionStreamSentinel.overlapSpeechStarted(speechDuration, userSpeakingSpan),
-      );
-    }
-  }
-  /**
-   * Called when user stops speaking during overlap.
-   * This ends the interruption detection inference for this overlap period.
-   */
-  onEndOfOverlapSpeech(): void {
-    if (!this.interruptionEnabled || !this.interruptionStream) {
-      return;
-    }
-    this.interruptionStream.pushFrame(InterruptionStreamSentinel.overlapSpeechEnded());
-  }
   setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
     this.deferredInputStream.setSource(audioStream);
   }
@@ -844,8 +686,6 @@ export class AudioRecognition {
     await this.sttTask?.cancelAndWait();
     await this.vadTask?.cancelAndWait();
     await this.bounceEOUTask?.cancelAndWait();
-    await this.interruptionTask?.cancelAndWait();
-    await this.interruptionStream?.close();
   }
   private _endUserTurnSpan({

package/src/voice/index.ts CHANGED Viewed

@@ -10,3 +10,4 @@ export { type TimedString } from './io.js';
 export * from './report.js';
 export * from './room_io/index.js';
 export { RunContext } from './run_context.js';
+export * as testing from './testing/index.js';

package/src/voice/room_io/room_io.ts CHANGED Viewed

@@ -51,6 +51,7 @@ const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev:
 };
 const DEFAULT_PARTICIPANT_KINDS: ParticipantKind[] = [
+  ParticipantKind.CONNECTOR,
   ParticipantKind.SIP,
   ParticipantKind.STANDARD,
 ];