npm - @livekit/agents - Versions diffs - 1.0.48 → 1.0.49 - Mend

@livekit/agents 1.0.48 → 1.0.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/dist/cpu.cjs +189 -0
package/dist/cpu.cjs.map +1 -0
package/dist/cpu.d.cts +24 -0
package/dist/cpu.d.ts +24 -0
package/dist/cpu.d.ts.map +1 -0
package/dist/cpu.js +152 -0
package/dist/cpu.js.map +1 -0
package/dist/cpu.test.cjs +227 -0
package/dist/cpu.test.cjs.map +1 -0
package/dist/cpu.test.js +204 -0
package/dist/cpu.test.js.map +1 -0
package/dist/inference/llm.cjs.map +1 -1
package/dist/inference/llm.d.cts +1 -1
package/dist/inference/llm.d.ts +1 -1
package/dist/inference/llm.d.ts.map +1 -1
package/dist/inference/llm.js.map +1 -1
package/dist/inference/tts.cjs.map +1 -1
package/dist/inference/tts.d.cts +6 -0
package/dist/inference/tts.d.ts +6 -0
package/dist/inference/tts.d.ts.map +1 -1
package/dist/inference/tts.js.map +1 -1
package/dist/version.cjs +1 -1
package/dist/version.js +1 -1
package/dist/voice/agent_activity.cjs +36 -8
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +37 -9
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +27 -1
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +6 -0
package/dist/voice/agent_session.d.ts +6 -0
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +27 -1
package/dist/voice/agent_session.js.map +1 -1
package/dist/worker.cjs +6 -29
package/dist/worker.cjs.map +1 -1
package/dist/worker.d.ts.map +1 -1
package/dist/worker.js +6 -19
package/dist/worker.js.map +1 -1
package/package.json +1 -1
package/src/cpu.test.ts +239 -0
package/src/cpu.ts +173 -0
package/src/inference/llm.ts +2 -0
package/src/inference/tts.ts +8 -1
package/src/voice/agent_activity.ts +58 -10
package/src/voice/agent_session.ts +33 -2
package/src/worker.ts +34 -50

package/src/voice/agent_activity.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import type { Span } from '@opentelemetry/api';
 import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
 import { Heap } from 'heap-js';
 import { AsyncLocalStorage } from 'node:async_hooks';
-import { ReadableStream } from 'node:stream/web';
+import { ReadableStream, TransformStream } from 'node:stream/web';
 import { type ChatContext, ChatMessage } from '../llm/chat_context.js';
 import {
   type ChatItem,
@@ -485,15 +485,36 @@ export class AgentActivity implements RecognitionHooks {
     void this.audioStream.close();
     this.audioStream = new MultiInputStream<AudioFrame>();
+    // Filter is applied on this.audioStream.stream (downstream of MultiInputStream) rather
+    // than on the source audioStream via pipeThrough. pipeThrough locks its source stream, so
+    // if it were applied directly on audioStream, that lock would survive MultiInputStream.close()
+    // and make audioStream permanently locked for subsequent attachAudioInput calls (e.g. handoff).
+    const aecWarmupAudioFilter = new TransformStream<AudioFrame, AudioFrame>({
+      transform: (frame, controller) => {
+        const shouldDiscardForAecWarmup =
+          this.agentSession.agentState === 'speaking' && this.agentSession._aecWarmupRemaining > 0;
+        if (!shouldDiscardForAecWarmup) {
+          controller.enqueue(frame);
+        }
+      },
+    });
     this.audioStreamId = this.audioStream.addInputStream(audioStream);
-    const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
-    if (this.realtimeSession) {
+    if (this.realtimeSession && this.audioRecognition) {
+      const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream
+        .pipeThrough(aecWarmupAudioFilter)
+        .tee();
       this.realtimeSession.setInputAudioStream(realtimeAudioStream);
-    }
-    if (this.audioRecognition) {
       this.audioRecognition.setInputAudioStream(recognitionAudioStream);
+    } else if (this.realtimeSession) {
+      this.realtimeSession.setInputAudioStream(
+        this.audioStream.stream.pipeThrough(aecWarmupAudioFilter),
+      );
+    } else if (this.audioRecognition) {
+      this.audioRecognition.setInputAudioStream(
+        this.audioStream.stream.pipeThrough(aecWarmupAudioFilter),
+      );
     }
   }
@@ -755,6 +776,11 @@ export class AgentActivity implements RecognitionHooks {
   }
   private interruptByAudioActivity(): void {
+    if (this.agentSession._aecWarmupRemaining > 0) {
+      // Disable interruption from audio activity while AEC warmup is active.
+      return;
+    }
     if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
       // skip speech handle interruption if server side turn detection is enabled
       return;
@@ -1210,7 +1236,24 @@ export class AgentActivity implements RecognitionHooks {
     this.realtimeSession?.interrupt();
-    if (currentSpeech === undefined) {
+    if (force) {
+      // Force-interrupt (used during shutdown): cancel all speech tasks so they
+      // don't block on I/O that will never complete (e.g. audioOutput.waitForPlayout()
+      // when the room is disconnected). Mark the current speech as done immediately
+      // so the interrupt future resolves without waiting for tasks to finish.
+      // Clear the queue so mainTask doesn't dequeue already-interrupted handles
+      // and hang on _waitForGeneration() (the generation future created by
+      // _authorizeGeneration would never resolve since _markDone is a no-op
+      // once doneFut is already settled).
+      for (const task of this.speechTasks) {
+        task.cancel();
+      }
+      if (currentSpeech && !currentSpeech.done()) {
+        currentSpeech._markDone();
+      }
+      this.speechQueue.clear();
+      future.resolve();
+    } else if (currentSpeech === undefined) {
       future.resolve();
     } else {
       currentSpeech.addDoneCallback(() => {
@@ -1718,9 +1761,7 @@ export class AgentActivity implements RecognitionHooks {
       }
       replyAbortController.abort();
-      await Promise.allSettled(
-        tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT)),
-      );
+      await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
       let forwardedText = textOut?.text || '';
@@ -2549,6 +2590,13 @@ export class AgentActivity implements RecognitionHooks {
     const unlock = await this.lock.lock();
     try {
       this.cancelPreemptiveGeneration();
+      await cancelAndWait(Array.from(this.speechTasks), AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
+      if (this._currentSpeech && !this._currentSpeech.done()) {
+        this._currentSpeech._markDone();
+      }
       await this._closeSessionResources();
       if (this._mainTask) {

package/src/voice/agent_session.ts CHANGED Viewed

@@ -77,6 +77,7 @@ export interface VoiceOptions {
   maxToolSteps: number;
   preemptiveGeneration: boolean;
   userAwayTimeout?: number | null;
+  aecWarmupDuration: number | null;
   useTtsAlignedTranscript: boolean;
 }
@@ -90,6 +91,7 @@ const defaultVoiceOptions: VoiceOptions = {
   maxToolSteps: 3,
   preemptiveGeneration: false,
   userAwayTimeout: 15.0,
+  aecWarmupDuration: 3000,
   useTtsAlignedTranscript: true,
 } as const;
@@ -158,6 +160,8 @@ export class AgentSession<
   private closingTask: Promise<void> | null = null;
   private userAwayTimer: NodeJS.Timeout | null = null;
+  private _aecWarmupTimer: NodeJS.Timeout | null = null;
   // Connection options for STT, LLM, and TTS
   private _connOptions: ResolvedSessionConnectOptions;
@@ -169,6 +173,9 @@ export class AgentSession<
   private userSpeakingSpan?: Span;
   private agentSpeakingSpan?: Span;
+  /** @internal */
+  _aecWarmupRemaining = 0;
   /** @internal */
   _recorderIO?: RecorderIO;
@@ -241,6 +248,7 @@ export class AgentSession<
     // This is the "global" chat context, it holds the entire conversation history
     this._chatCtx = ChatContext.empty();
     this.options = { ...defaultVoiceOptions, ...voiceOptions };
+    this._aecWarmupRemaining = this.options.aecWarmupDuration ?? 0;
     this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
     this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
@@ -774,7 +782,9 @@ export class AgentSession<
     if (this.closingTask) {
       return;
     }
-    this.closeImpl(reason, error, drain);
+    this.closingTask = this.closeImpl(reason, error, drain).finally(() => {
+      this.closingTask = null;
+    });
   }
   /** @internal */
@@ -845,6 +855,14 @@ export class AgentSession<
       this.agentSpeakingSpan = undefined;
     }
+    if (state === 'speaking' && this._aecWarmupRemaining > 0 && this._aecWarmupTimer === null) {
+      this._aecWarmupTimer = setTimeout(() => this._onAecWarmupExpired(), this._aecWarmupRemaining);
+      this.logger.debug(
+        { warmupDurationMs: this._aecWarmupRemaining },
+        'aec warmup active, disabling interruptions',
+      );
+    }
     const oldState = this._agentState;
     this._agentState = state;
@@ -938,6 +956,19 @@ export class AgentSession<
     }
   }
+  /** @internal */
+  _onAecWarmupExpired(): void {
+    if (this._aecWarmupRemaining > 0) {
+      this.logger.debug('aec warmup expired, re-enabling interruptions');
+    }
+    this._aecWarmupRemaining = 0;
+    if (this._aecWarmupTimer !== null) {
+      clearTimeout(this._aecWarmupTimer);
+      this._aecWarmupTimer = null;
+    }
+  }
   private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
     if (this.userState === 'away' && ev.isFinal) {
       this.logger.debug('User returned from away state due to speech input');
@@ -969,6 +1000,7 @@ export class AgentSession<
     }
     this._cancelUserAwayTimer();
+    this._onAecWarmupExpired();
     this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
     if (this.activity) {
@@ -976,7 +1008,6 @@ export class AgentSession<
         try {
           await this.activity.interrupt({ force: true }).await;
         } catch (error) {
-          // Uninterruptible speech can throw during forced interruption.
           this.logger.warn({ error }, 'Error interrupting activity');
         }
       }

package/src/worker.ts CHANGED Viewed

@@ -13,8 +13,8 @@ import {
 import type { ParticipantInfo } from 'livekit-server-sdk';
 import { AccessToken, RoomServiceClient } from 'livekit-server-sdk';
 import { EventEmitter } from 'node:events';
-import os from 'node:os';
 import { WebSocket } from 'ws';
+import { getCpuMonitor } from './cpu.js';
 import { HTTPServer } from './http_server.js';
 import { InferenceRunner } from './inference_runner.js';
 import { InferenceProcExecutor } from './ipc/inference_proc_executor.js';
@@ -79,32 +79,11 @@ const defaultRequestFunc = async (ctx: JobRequest) => {
   await ctx.accept();
 };
-// eslint-disable-next-line @typescript-eslint/no-unused-vars
-const defaultCpuLoad = async (worker: AgentServer): Promise<number> => {
-  return new Promise((resolve) => {
-    const cpus1 = os.cpus();
-    setTimeout(() => {
-      const cpus2 = os.cpus();
-      let idle = 0;
-      let total = 0;
-      for (let i = 0; i < cpus1.length; i++) {
-        const cpu1 = cpus1[i]!.times;
-        const cpu2 = cpus2[i]!.times;
-        idle += cpu2.idle - cpu1.idle;
-        const total1 = Object.values(cpu1).reduce((acc, i) => acc + i, 0);
-        const total2 = Object.values(cpu2).reduce((acc, i) => acc + i, 0);
+const cpuMonitor = getCpuMonitor();
-        total += total2 - total1;
-      }
-      resolve(+(1 - idle / total).toFixed(2));
-    }, UPDATE_LOAD_INTERVAL);
-  });
+// eslint-disable-next-line @typescript-eslint/no-unused-vars
+const defaultCpuLoad = async (_worker: AgentServer): Promise<number> => {
+  return cpuMonitor.cpuPercent(UPDATE_LOAD_INTERVAL);
 };
 /** Participant permissions to pass to every agent spun up by this worker. */
@@ -651,33 +630,38 @@ export class AgentServer {
       if (closingWS) clearInterval(loadMonitor);
       const oldStatus = currentStatus;
-      this.#opts.loadFunc(this).then((currentLoad: number) => {
-        const isFull = currentLoad >= this.#opts.loadThreshold;
-        const currentlyAvailable = !isFull;
-        currentStatus = currentlyAvailable ? WorkerStatus.WS_AVAILABLE : WorkerStatus.WS_FULL;
-        if (oldStatus != currentStatus) {
-          const extra = { load: currentLoad, loadThreshold: this.#opts.loadThreshold };
-          if (isFull) {
-            this.#logger.child(extra).info('worker is at full capacity, marking as unavailable');
-          } else {
-            this.#logger.child(extra).info('worker is below capacity, marking as available');
+      this.#opts
+        .loadFunc(this)
+        .then((currentLoad: number) => {
+          const isFull = currentLoad >= this.#opts.loadThreshold;
+          const currentlyAvailable = !isFull;
+          currentStatus = currentlyAvailable ? WorkerStatus.WS_AVAILABLE : WorkerStatus.WS_FULL;
+          if (oldStatus != currentStatus) {
+            const extra = { load: currentLoad, loadThreshold: this.#opts.loadThreshold };
+            if (isFull) {
+              this.#logger.child(extra).info('worker is at full capacity, marking as unavailable');
+            } else {
+              this.#logger.child(extra).info('worker is below capacity, marking as available');
+            }
           }
-        }
-        this.event.emit(
-          'worker_msg',
-          new WorkerMessage({
-            message: {
-              case: 'updateWorker',
-              value: {
-                load: currentLoad,
-                status: currentStatus,
+          this.event.emit(
+            'worker_msg',
+            new WorkerMessage({
+              message: {
+                case: 'updateWorker',
+                value: {
+                  load: currentLoad,
+                  status: currentStatus,
+                },
               },
-            },
-          }),
-        );
-      });
+            }),
+          );
+        })
+        .catch((e) => {
+          this.#logger.warn({ error: e }, 'failed to measure CPU load');
+        });
     }, UPDATE_LOAD_INTERVAL);
     await close;