npm - @livekit/agents - Versions diffs - 0.5.2 → 0.6.0 - Mend

@livekit/agents 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

package/dist/index.cjs +3 -0
package/dist/index.cjs.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -0
package/dist/index.js.map +1 -1
package/dist/llm/index.cjs +2 -0
package/dist/llm/index.cjs.map +1 -1
package/dist/llm/index.d.ts +1 -1
package/dist/llm/index.d.ts.map +1 -1
package/dist/llm/index.js +2 -0
package/dist/llm/index.js.map +1 -1
package/dist/llm/llm.cjs +47 -3
package/dist/llm/llm.cjs.map +1 -1
package/dist/llm/llm.d.ts +15 -2
package/dist/llm/llm.d.ts.map +1 -1
package/dist/llm/llm.js +46 -3
package/dist/llm/llm.js.map +1 -1
package/dist/metrics/base.cjs +44 -0
package/dist/metrics/base.cjs.map +1 -0
package/dist/metrics/base.d.ts +96 -0
package/dist/metrics/base.d.ts.map +1 -0
package/dist/metrics/base.js +20 -0
package/dist/metrics/base.js.map +1 -0
package/dist/metrics/index.cjs +35 -0
package/dist/metrics/index.cjs.map +1 -0
package/dist/metrics/index.d.ts +5 -0
package/dist/metrics/index.d.ts.map +1 -0
package/dist/metrics/index.js +9 -0
package/dist/metrics/index.js.map +1 -0
package/dist/metrics/usage_collector.cjs +53 -0
package/dist/metrics/usage_collector.cjs.map +1 -0
package/dist/metrics/usage_collector.d.ts +14 -0
package/dist/metrics/usage_collector.d.ts.map +1 -0
package/dist/metrics/usage_collector.js +29 -0
package/dist/metrics/usage_collector.js.map +1 -0
package/dist/metrics/utils.cjs +104 -0
package/dist/metrics/utils.cjs.map +1 -0
package/dist/metrics/utils.d.ts +10 -0
package/dist/metrics/utils.d.ts.map +1 -0
package/dist/metrics/utils.js +73 -0
package/dist/metrics/utils.js.map +1 -0
package/dist/multimodal/multimodal_agent.cjs +7 -13
package/dist/multimodal/multimodal_agent.cjs.map +1 -1
package/dist/multimodal/multimodal_agent.d.ts +1 -4
package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
package/dist/multimodal/multimodal_agent.js +7 -13
package/dist/multimodal/multimodal_agent.js.map +1 -1
package/dist/pipeline/index.cjs +2 -0
package/dist/pipeline/index.cjs.map +1 -1
package/dist/pipeline/index.d.ts +1 -1
package/dist/pipeline/index.d.ts.map +1 -1
package/dist/pipeline/index.js +3 -1
package/dist/pipeline/index.js.map +1 -1
package/dist/pipeline/pipeline_agent.cjs +166 -66
package/dist/pipeline/pipeline_agent.cjs.map +1 -1
package/dist/pipeline/pipeline_agent.d.ts +10 -4
package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
package/dist/pipeline/pipeline_agent.js +169 -69
package/dist/pipeline/pipeline_agent.js.map +1 -1
package/dist/pipeline/speech_handle.cjs +49 -1
package/dist/pipeline/speech_handle.cjs.map +1 -1
package/dist/pipeline/speech_handle.d.ts +12 -2
package/dist/pipeline/speech_handle.d.ts.map +1 -1
package/dist/pipeline/speech_handle.js +50 -2
package/dist/pipeline/speech_handle.js.map +1 -1
package/dist/stt/index.cjs.map +1 -1
package/dist/stt/index.d.ts +1 -1
package/dist/stt/index.d.ts.map +1 -1
package/dist/stt/index.js.map +1 -1
package/dist/stt/stream_adapter.cjs +15 -5
package/dist/stt/stream_adapter.cjs.map +1 -1
package/dist/stt/stream_adapter.d.ts +4 -1
package/dist/stt/stream_adapter.d.ts.map +1 -1
package/dist/stt/stream_adapter.js +15 -5
package/dist/stt/stream_adapter.js.map +1 -1
package/dist/stt/stt.cjs +46 -2
package/dist/stt/stt.cjs.map +1 -1
package/dist/stt/stt.d.ts +25 -3
package/dist/stt/stt.d.ts.map +1 -1
package/dist/stt/stt.js +46 -2
package/dist/stt/stt.js.map +1 -1
package/dist/tts/index.cjs +4 -2
package/dist/tts/index.cjs.map +1 -1
package/dist/tts/index.d.ts +1 -1
package/dist/tts/index.d.ts.map +1 -1
package/dist/tts/index.js +3 -1
package/dist/tts/index.js.map +1 -1
package/dist/tts/stream_adapter.cjs +14 -3
package/dist/tts/stream_adapter.cjs.map +1 -1
package/dist/tts/stream_adapter.d.ts +3 -0
package/dist/tts/stream_adapter.d.ts.map +1 -1
package/dist/tts/stream_adapter.js +15 -4
package/dist/tts/stream_adapter.js.map +1 -1
package/dist/tts/tts.cjs +109 -6
package/dist/tts/tts.cjs.map +1 -1
package/dist/tts/tts.d.ts +24 -1
package/dist/tts/tts.d.ts.map +1 -1
package/dist/tts/tts.js +107 -5
package/dist/tts/tts.js.map +1 -1
package/dist/vad.cjs +43 -2
package/dist/vad.cjs.map +1 -1
package/dist/vad.d.ts +21 -4
package/dist/vad.d.ts.map +1 -1
package/dist/vad.js +43 -2
package/dist/vad.js.map +1 -1
package/package.json +1 -1
package/src/index.ts +2 -1
package/src/llm/index.ts +2 -0
package/src/llm/llm.ts +55 -3
package/src/metrics/base.ts +127 -0
package/src/metrics/index.ts +20 -0
package/src/metrics/usage_collector.ts +40 -0
package/src/metrics/utils.ts +100 -0
package/src/multimodal/multimodal_agent.ts +12 -17
package/src/pipeline/index.ts +1 -1
package/src/pipeline/pipeline_agent.ts +206 -87
package/src/pipeline/speech_handle.ts +67 -2
package/src/stt/index.ts +2 -0
package/src/stt/stream_adapter.ts +17 -5
package/src/stt/stt.ts +67 -3
package/src/tts/index.ts +2 -0
package/src/tts/stream_adapter.ts +17 -4
package/src/tts/tts.ts +127 -4
package/src/vad.ts +61 -4

package/src/stt/stt.ts CHANGED Viewed

@@ -2,6 +2,9 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import type { AudioFrame } from '@livekit/rtc-node';
+import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
+import { EventEmitter } from 'node:events';
+import type { STTMetrics } from '../metrics/base.js';
 import type { AudioBuffer } from '../utils.js';
 import { AsyncIterableQueue } from '../utils.js';
@@ -27,6 +30,9 @@ export enum SpeechEventType {
    * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.
    */
   END_OF_SPEECH = 3,
+  /** Usage event, emitted periodically to indicate usage metrics. */
+  RECOGNITION_USAGE = 4,
+  METRICS_COLLECTED = 5,
 }
 /** SpeechData contains metadata about this {@link SpeechEvent}. */
@@ -38,10 +44,16 @@ export interface SpeechData {
   confidence: number;
 }
+export interface RecognitionUsage {
+  audioDuration: number;
+}
 /** SpeechEvent is a packet of speech-to-text data. */
 export interface SpeechEvent {
   type: SpeechEventType;
   alternatives?: [SpeechData, ...SpeechData[]];
+  requestId?: string;
+  recognitionUsage?: RecognitionUsage;
 }
 /**
@@ -55,6 +67,10 @@ export interface STTCapabilities {
   interimResults: boolean;
 }
+export type STTCallbacks = {
+  [SpeechEventType.METRICS_COLLECTED]: (metrics: STTMetrics) => void;
+};
 /**
  * An instance of a speech-to-text adapter.
  *
@@ -62,10 +78,12 @@ export interface STTCapabilities {
  * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
  * exports its own child STT class, which inherits this class's methods.
  */
-export abstract class STT {
+export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {
+  abstract label: string;
   #capabilities: STTCapabilities;
   constructor(capabilities: STTCapabilities) {
+    super();
     this.#capabilities = capabilities;
   }
@@ -75,7 +93,24 @@ export abstract class STT {
   }
   /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
-  abstract recognize(frame: AudioBuffer): Promise<SpeechEvent>;
+  async recognize(frame: AudioBuffer): Promise<SpeechEvent> {
+    const startTime = process.hrtime.bigint();
+    const event = await this._recognize(frame);
+    const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
+    this.emit(SpeechEventType.METRICS_COLLECTED, {
+      requestId: event.requestId ?? '',
+      timestamp: Date.now(),
+      duration,
+      label: this.label,
+      audioDuration: Array.isArray(frame)
+        ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)
+        : frame.samplesPerChannel / frame.sampleRate,
+      streamed: false,
+    });
+    return event;
+  }
+  protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;
   /**
    * Returns a {@link SpeechStream} that can be used to push audio frames and receive
@@ -103,8 +138,36 @@ export abstract class STT {
 export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {
   protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
   protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
+  protected output = new AsyncIterableQueue<SpeechEvent>();
   protected queue = new AsyncIterableQueue<SpeechEvent>();
+  abstract label: string;
   protected closed = false;
+  #stt: STT;
+  constructor(stt: STT) {
+    this.#stt = stt;
+    this.monitorMetrics();
+  }
+  protected async monitorMetrics() {
+    const startTime = process.hrtime.bigint();
+    for await (const event of this.queue) {
+      this.output.put(event);
+      if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
+      const duration = process.hrtime.bigint() - startTime;
+      const metrics: STTMetrics = {
+        timestamp: Date.now(),
+        requestId: event.requestId!,
+        duration: Math.trunc(Number(duration / BigInt(1000000))),
+        label: this.label,
+        audioDuration: event.recognitionUsage!.audioDuration,
+        streamed: true,
+      };
+      this.#stt.emit(SpeechEventType.METRICS_COLLECTED, metrics);
+    }
+    this.output.close();
+  }
   /** Push an audio frame to the STT */
   pushFrame(frame: AudioFrame) {
@@ -140,13 +203,14 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
   }
   next(): Promise<IteratorResult<SpeechEvent>> {
-    return this.queue.next();
+    return this.output.next();
   }
   /** Close both the input and output of the STT stream */
   close() {
     this.input.close();
     this.queue.close();
+    this.output.close();
     this.closed = true;
   }

package/src/tts/index.ts CHANGED Viewed

@@ -4,7 +4,9 @@
 export {
   type SynthesizedAudio,
   type TTSCapabilities,
+  type TTSCallbacks,
   TTS,
+  TTSEvent,
   SynthesizeStream,
   ChunkedStream,
 } from './tts.js';

package/src/tts/stream_adapter.ts CHANGED Viewed

@@ -3,16 +3,23 @@
 // SPDX-License-Identifier: Apache-2.0
 import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';
 import type { ChunkedStream } from './tts.js';
-import { SynthesizeStream, TTS } from './tts.js';
+import { SynthesizeStream, TTS, TTSEvent } from './tts.js';
 export class StreamAdapter extends TTS {
   #tts: TTS;
   #sentenceTokenizer: SentenceTokenizer;
+  label: string;
   constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {
     super(tts.sampleRate, tts.numChannels, { streaming: true });
     this.#tts = tts;
     this.#sentenceTokenizer = sentenceTokenizer;
+    this.label = this.#tts.label;
+    this.label = `tts.StreamAdapter<${this.#tts.label}>`;
+    this.#tts.on(TTSEvent.METRICS_COLLECTED, (metrics) => {
+      this.emit(TTSEvent.METRICS_COLLECTED, metrics);
+    });
   }
   synthesize(text: string): ChunkedStream {
@@ -27,15 +34,21 @@ export class StreamAdapter extends TTS {
 export class StreamAdapterWrapper extends SynthesizeStream {
   #tts: TTS;
   #sentenceStream: SentenceStream;
+  label: string;
   constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {
-    super();
+    super(tts);
     this.#tts = tts;
     this.#sentenceStream = sentenceTokenizer.stream();
+    this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;
     this.#run();
   }
+  async monitorMetrics() {
+    return; // do nothing
+  }
   async #run() {
     const forwardInput = async () => {
       for await (const input of this.input) {
@@ -52,10 +65,10 @@ export class StreamAdapterWrapper extends SynthesizeStream {
     const synthesize = async () => {
       for await (const ev of this.#sentenceStream) {
         for await (const audio of this.#tts.synthesize(ev.token)) {
-          this.queue.put(audio);
+          this.output.put(audio);
         }
       }
-      this.queue.put(SynthesizeStream.END_OF_STREAM);
+      this.output.put(SynthesizeStream.END_OF_STREAM);
     };
     Promise.all([forwardInput(), synthesize()]);

package/src/tts/tts.ts CHANGED Viewed

@@ -2,6 +2,9 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import type { AudioFrame } from '@livekit/rtc-node';
+import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
+import { EventEmitter } from 'node:events';
+import type { TTSMetrics } from '../metrics/base.js';
 import { AsyncIterableQueue, mergeFrames } from '../utils.js';
 /** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */
@@ -14,6 +17,8 @@ export interface SynthesizedAudio {
   frame: AudioFrame;
   /** Current segment of the synthesized audio */
   deltaText?: string;
+  /** Whether this is the last frame of the segment (streaming only) */
+  final: boolean;
 }
 /**
@@ -27,6 +32,14 @@ export interface TTSCapabilities {
   streaming: boolean;
 }
+export enum TTSEvent {
+  METRICS_COLLECTED,
+}
+export type TTSCallbacks = {
+  [TTSEvent.METRICS_COLLECTED]: (metrics: TTSMetrics) => void;
+};
 /**
  * An instance of a text-to-speech adapter.
  *
@@ -34,12 +47,14 @@ export interface TTSCapabilities {
  * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that
  * exports its own child TTS class, which inherits this class's methods.
  */
-export abstract class TTS {
+export abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {
   #capabilities: TTSCapabilities;
   #sampleRate: number;
   #numChannels: number;
+  abstract label: string;
   constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {
+    super();
     this.#capabilities = capabilities;
     this.#sampleRate = sampleRate;
     this.#numChannels = numChannels;
@@ -94,10 +109,71 @@ export abstract class SynthesizeStream
   protected queue = new AsyncIterableQueue<
     SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM
   >();
+  protected output = new AsyncIterableQueue<
+    SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM
+  >();
   protected closed = false;
+  abstract label: string;
+  #tts: TTS;
+  #metricsPendingTexts: string[] = [];
+  #metricsText = '';
+  #monitorMetricsTask?: Promise<void>;
+  constructor(tts: TTS) {
+    this.#tts = tts;
+  }
+  protected async monitorMetrics() {
+    const startTime = process.hrtime.bigint();
+    let audioDuration = 0;
+    let ttfb: bigint | undefined;
+    let requestId = '';
+    const emit = () => {
+      if (this.#metricsPendingTexts.length) {
+        const text = this.#metricsPendingTexts.shift()!;
+        const duration = process.hrtime.bigint() - startTime;
+        const metrics: TTSMetrics = {
+          timestamp: Date.now(),
+          requestId,
+          ttfb: Math.trunc(Number(ttfb! / BigInt(1000000))),
+          duration: Math.trunc(Number(duration / BigInt(1000000))),
+          charactersCount: text.length,
+          audioDuration,
+          cancelled: false, // XXX(nbsp)
+          label: this.label,
+          streamed: false,
+        };
+        this.#tts.emit(TTSEvent.METRICS_COLLECTED, metrics);
+      }
+    };
+    for await (const audio of this.queue) {
+      this.output.put(audio);
+      if (audio === SynthesizeStream.END_OF_STREAM) continue;
+      requestId = audio.requestId;
+      if (!ttfb) {
+        ttfb = process.hrtime.bigint() - startTime;
+      }
+      audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
+      if (audio.final) {
+        emit();
+      }
+    }
+    if (requestId) {
+      emit();
+    }
+    this.output.close();
+  }
   /** Push a string of text to the TTS */
   pushText(text: string) {
+    if (!this.#monitorMetricsTask) {
+      this.#monitorMetricsTask = this.monitorMetrics();
+    }
+    this.#metricsText += text;
     if (this.input.closed) {
       throw new Error('Input is closed');
     }
@@ -109,6 +185,10 @@ export abstract class SynthesizeStream
   /** Flush the TTS, causing it to process all pending text */
   flush() {
+    if (this.#metricsText) {
+      this.#metricsPendingTexts.push(this.#metricsText);
+      this.#metricsText = '';
+    }
     if (this.input.closed) {
       throw new Error('Input is closed');
     }
@@ -130,13 +210,13 @@ export abstract class SynthesizeStream
   }
   next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {
-    return this.queue.next();
+    return this.output.next();
   }
   /** Close both the input and output of the TTS stream */
   close() {
     this.input.close();
-    this.queue.close();
+    this.output.close();
     this.closed = true;
   }
@@ -161,7 +241,49 @@ export abstract class SynthesizeStream
  */
 export abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {
   protected queue = new AsyncIterableQueue<SynthesizedAudio>();
+  protected output = new AsyncIterableQueue<SynthesizedAudio>();
   protected closed = false;
+  abstract label: string;
+  #text: string;
+  #tts: TTS;
+  constructor(text: string, tts: TTS) {
+    this.#text = text;
+    this.#tts = tts;
+    this.monitorMetrics();
+  }
+  protected async monitorMetrics() {
+    const startTime = process.hrtime.bigint();
+    let audioDuration = 0;
+    let ttfb: bigint | undefined;
+    let requestId = '';
+    for await (const audio of this.queue) {
+      this.output.put(audio);
+      requestId = audio.requestId;
+      if (!ttfb) {
+        ttfb = process.hrtime.bigint() - startTime;
+      }
+      audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
+    }
+    this.output.close();
+    const duration = process.hrtime.bigint() - startTime;
+    const metrics: TTSMetrics = {
+      timestamp: Date.now(),
+      requestId,
+      ttfb: Math.trunc(Number(ttfb! / BigInt(1000000))),
+      duration: Math.trunc(Number(duration / BigInt(1000000))),
+      charactersCount: this.#text.length,
+      audioDuration,
+      cancelled: false, // XXX(nbsp)
+      label: this.label,
+      streamed: false,
+    };
+    this.#tts.emit(TTSEvent.METRICS_COLLECTED, metrics);
+  }
   /** Collect every frame into one in a single call */
   async collect(): Promise<AudioFrame> {
@@ -173,12 +295,13 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
   }
   next(): Promise<IteratorResult<SynthesizedAudio>> {
-    return this.queue.next();
+    return this.output.next();
   }
   /** Close both the input and output of the TTS stream */
   close() {
     this.queue.close();
+    this.output.close();
     this.closed = true;
   }

package/src/vad.ts CHANGED Viewed

@@ -2,12 +2,16 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 import type { AudioFrame } from '@livekit/rtc-node';
+import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
+import { EventEmitter } from 'node:events';
+import type { VADMetrics } from './metrics/base.js';
 import { AsyncIterableQueue } from './utils.js';
 export enum VADEventType {
   START_OF_SPEECH,
   INFERENCE_DONE,
   END_OF_SPEECH,
+  METRICS_COLLECTED,
 }
 export interface VADEvent {
@@ -19,9 +23,9 @@ export interface VADEvent {
   samplesIndex: number;
   /** Timestamp when the event was fired. */
   timestamp: number;
-  /** Duration of the detected speech segment in seconds. */
+  /** Duration of the speech segment. */
   speechDuration: number;
-  /** Duration of the silence segment preceding or following the speech, in seconds. */
+  /** Duration of the silence segment. */
   silenceDuration: number;
   /**
    * List of audio frames associated with the speech.
@@ -38,15 +42,26 @@ export interface VADEvent {
   inferenceDuration: number;
   /** Indicates whether speech was detected in the frames. */
   speaking: boolean;
+  /** Threshold used to detect silence. */
+  rawAccumulatedSilence: number;
+  /** Threshold used to detect speech. */
+  rawAccumulatedSpeech: number;
 }
 export interface VADCapabilities {
   updateInterval: number;
 }
-export abstract class VAD {
+export type VADCallbacks = {
+  [VADEventType.METRICS_COLLECTED]: (metrics: VADMetrics) => void;
+};
+export abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {
   #capabilities: VADCapabilities;
+  abstract label: string;
   constructor(capabilities: VADCapabilities) {
+    super();
     this.#capabilities = capabilities;
   }
@@ -64,7 +79,48 @@ export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
   protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
   protected input = new AsyncIterableQueue<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();
   protected queue = new AsyncIterableQueue<VADEvent>();
+  protected output = new AsyncIterableQueue<VADEvent>();
   protected closed = false;
+  #vad: VAD;
+  #lastActivityTime = BigInt(0);
+  constructor(vad: VAD) {
+    this.#vad = vad;
+    this.monitorMetrics();
+  }
+  protected async monitorMetrics() {
+    let inferenceDurationTotal = 0;
+    let inferenceCount = 0;
+    for await (const event of this.queue) {
+      this.output.put(event);
+      switch (event.type) {
+        case VADEventType.START_OF_SPEECH:
+          inferenceCount++;
+          if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {
+            this.#vad.emit(VADEventType.METRICS_COLLECTED, {
+              timestamp: Date.now(),
+              idleTime: Math.trunc(
+                Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),
+              ),
+              inferenceDurationTotal,
+              inferenceCount,
+              label: this.#vad.label,
+            });
+            inferenceCount = 0;
+            inferenceDurationTotal = 0;
+          }
+          break;
+        case VADEventType.INFERENCE_DONE:
+        case VADEventType.END_OF_SPEECH:
+          this.#lastActivityTime = process.hrtime.bigint();
+          break;
+      }
+    }
+    this.output.close();
+  }
   pushFrame(frame: AudioFrame) {
     if (this.input.closed) {
@@ -97,12 +153,13 @@ export abstract class VADStream implements AsyncIterableIterator<VADEvent> {
   }
   next(): Promise<IteratorResult<VADEvent>> {
-    return this.queue.next();
+    return this.output.next();
   }
   close() {
     this.input.close();
     this.queue.close();
+    this.output.close();
     this.closed = true;
   }