npm - @livekit/agents - Versions diffs - 1.0.25 → 1.0.30 - Mend

@livekit/agents 1.0.25 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/dist/connection_pool.cjs +242 -0
package/dist/connection_pool.cjs.map +1 -0
package/dist/connection_pool.d.cts +123 -0
package/dist/connection_pool.d.ts +123 -0
package/dist/connection_pool.d.ts.map +1 -0
package/dist/connection_pool.js +218 -0
package/dist/connection_pool.js.map +1 -0
package/dist/connection_pool.test.cjs +256 -0
package/dist/connection_pool.test.cjs.map +1 -0
package/dist/connection_pool.test.js +255 -0
package/dist/connection_pool.test.js.map +1 -0
package/dist/index.cjs +2 -0
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +1 -0
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js +1 -0
package/dist/index.js.map +1 -1
package/dist/inference/tts.cjs +172 -58
package/dist/inference/tts.cjs.map +1 -1
package/dist/inference/tts.d.cts +3 -1
package/dist/inference/tts.d.ts +3 -1
package/dist/inference/tts.d.ts.map +1 -1
package/dist/inference/tts.js +173 -59
package/dist/inference/tts.js.map +1 -1
package/dist/tts/stream_adapter.cjs +6 -3
package/dist/tts/stream_adapter.cjs.map +1 -1
package/dist/tts/stream_adapter.d.cts +1 -1
package/dist/tts/stream_adapter.d.ts +1 -1
package/dist/tts/stream_adapter.d.ts.map +1 -1
package/dist/tts/stream_adapter.js +6 -3
package/dist/tts/stream_adapter.js.map +1 -1
package/dist/tts/tts.cjs +26 -15
package/dist/tts/tts.cjs.map +1 -1
package/dist/tts/tts.d.cts +7 -4
package/dist/tts/tts.d.ts +7 -4
package/dist/tts/tts.d.ts.map +1 -1
package/dist/tts/tts.js +26 -15
package/dist/tts/tts.js.map +1 -1
package/dist/utils.cjs +20 -0
package/dist/utils.cjs.map +1 -1
package/dist/utils.d.cts +7 -0
package/dist/utils.d.ts +7 -0
package/dist/utils.d.ts.map +1 -1
package/dist/utils.js +19 -0
package/dist/utils.js.map +1 -1
package/dist/voice/agent_activity.cjs +3 -1
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +3 -1
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +6 -1
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +6 -1
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/avatar/datastream_io.cjs +1 -1
package/dist/voice/avatar/datastream_io.cjs.map +1 -1
package/dist/voice/avatar/datastream_io.js +1 -1
package/dist/voice/avatar/datastream_io.js.map +1 -1
package/dist/voice/background_audio.cjs +77 -37
package/dist/voice/background_audio.cjs.map +1 -1
package/dist/voice/background_audio.d.cts +10 -3
package/dist/voice/background_audio.d.ts +10 -3
package/dist/voice/background_audio.d.ts.map +1 -1
package/dist/voice/background_audio.js +78 -37
package/dist/voice/background_audio.js.map +1 -1
package/dist/voice/index.cjs +1 -0
package/dist/voice/index.cjs.map +1 -1
package/dist/voice/index.d.cts +1 -0
package/dist/voice/index.d.ts +1 -0
package/dist/voice/index.d.ts.map +1 -1
package/dist/voice/index.js +1 -0
package/dist/voice/index.js.map +1 -1
package/dist/voice/io.cjs +10 -1
package/dist/voice/io.cjs.map +1 -1
package/dist/voice/io.d.cts +18 -1
package/dist/voice/io.d.ts +18 -1
package/dist/voice/io.d.ts.map +1 -1
package/dist/voice/io.js +10 -1
package/dist/voice/io.js.map +1 -1
package/dist/voice/recorder_io/recorder_io.cjs +1 -1
package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
package/dist/voice/recorder_io/recorder_io.js +1 -1
package/dist/voice/recorder_io/recorder_io.js.map +1 -1
package/dist/voice/room_io/_output.cjs +1 -1
package/dist/voice/room_io/_output.cjs.map +1 -1
package/dist/voice/room_io/_output.js +1 -1
package/dist/voice/room_io/_output.js.map +1 -1
package/dist/voice/transcription/synchronizer.cjs +1 -1
package/dist/voice/transcription/synchronizer.cjs.map +1 -1
package/dist/voice/transcription/synchronizer.js +1 -1
package/dist/voice/transcription/synchronizer.js.map +1 -1
package/dist/worker.cjs +4 -6
package/dist/worker.cjs.map +1 -1
package/dist/worker.d.ts.map +1 -1
package/dist/worker.js +4 -6
package/dist/worker.js.map +1 -1
package/package.json +3 -3
package/src/connection_pool.test.ts +346 -0
package/src/connection_pool.ts +307 -0
package/src/index.ts +1 -0
package/src/inference/tts.ts +206 -65
package/src/tts/stream_adapter.ts +10 -3
package/src/tts/tts.ts +41 -18
package/src/utils.ts +25 -0
package/src/voice/agent_activity.ts +7 -1
package/src/voice/agent_session.ts +6 -1
package/src/voice/avatar/datastream_io.ts +1 -1
package/src/voice/background_audio.ts +95 -55
package/src/voice/index.ts +1 -0
package/src/voice/io.ts +24 -0
package/src/voice/recorder_io/recorder_io.ts +1 -1
package/src/voice/room_io/_output.ts +1 -1
package/src/voice/transcription/synchronizer.ts +1 -1
package/src/worker.ts +4 -7

package/src/voice/background_audio.ts CHANGED Viewed

@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0
 import {
   AudioFrame,
+  AudioMixer,
   AudioSource,
   LocalAudioTrack,
   type LocalTrackPublication,
@@ -57,7 +58,7 @@ export interface BackgroundAudioPlayerOptions {
   /**
    * Sound to play when the agent is thinking.
-   * TODO (Brian): Implement thinking sound when AudioMixer becomes available
+   * Plays when agent state changes to 'thinking' and stops when it changes to other states.
    */
   thinkingSound?: AudioSourceType | AudioConfig | AudioConfig[];
@@ -113,15 +114,16 @@ export class PlayHandle {
  * This class handles playing ambient sounds and manages audio track publishing.
  * It supports:
  * - Continuous ambient sound playback with looping
+ * - Thinking sound playback during agent processing
+ * - Multiple simultaneous audio streams via AudioMixer
  * - Volume control and probability-based sound selection
  * - Integration with LiveKit rooms and agent sessions
  *
- * Note: Thinking sound not yet supported
- *
  * @example
  * ```typescript
  * const player = new BackgroundAudioPlayer({
  *   ambientSound: { source: BuiltinAudioClip.OFFICE_AMBIENCE, volume: 0.8 },
+ *   thinkingSound: { source: BuiltinAudioClip.KEYBOARD_TYPING, volume: 0.6 },
  * });
  *
  * await player.start({ room, agentSession });
@@ -130,9 +132,12 @@ export class PlayHandle {
 export class BackgroundAudioPlayer {
   private ambientSound?: AudioSourceType | AudioConfig | AudioConfig[];
   private thinkingSound?: AudioSourceType | AudioConfig | AudioConfig[];
+  private streamTimeoutMs: number;
   private playTasks: Task<void>[] = [];
   private audioSource = new AudioSource(48000, 1, AUDIO_SOURCE_BUFFER_MS);
+  private audioMixer: AudioMixer;
+  private mixerTask?: Task<void>;
   private room?: Room;
   private agentSession?: AgentSession;
@@ -143,20 +148,24 @@ export class BackgroundAudioPlayer {
   private ambientHandle?: PlayHandle;
   private thinkingHandle?: PlayHandle;
+  private closed = true;
   // TODO (Brian): add lock
   #logger = log();
   constructor(options?: BackgroundAudioPlayerOptions) {
-    const { ambientSound, thinkingSound } = options || {};
+    const { ambientSound, thinkingSound, streamTimeoutMs = 200 } = options || {};
     this.ambientSound = ambientSound;
     this.thinkingSound = thinkingSound;
+    this.streamTimeoutMs = streamTimeoutMs;
-    if (this.thinkingSound) {
-      this.#logger.warn('thinkingSound is not yet supported');
-      // TODO: Implement thinking sound when AudioMixer becomes available
-    }
+    this.audioMixer = new AudioMixer(48000, 1, {
+      blocksize: 4800, // 100ms at 48kHz
+      capacity: 1,
+      streamTimeoutMs: this.streamTimeoutMs,
+    });
   }
   /**
@@ -278,15 +287,24 @@ export class BackgroundAudioPlayer {
     this.agentSession = agentSession;
     this.trackPublishOptions = trackPublishOptions;
+    this.closed = false;
     await this.publishTrack();
     // TODO (Brian): check job context is not fake
-    // TODO (Brian): start audio mixer task
+    this.mixerTask = Task.from(async () => {
+      try {
+        await this.runMixerTask();
+      } catch (err) {
+        if (this.closed) return; // expected when AudioSource is closed
+        throw err;
+      }
+    });
     this.room.on('reconnected', this.onReconnected);
     this.agentSession?.on(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
     if (!this.ambientSound) return;
     const normalized = this.normalizeSoundSource(this.ambientSound);
@@ -301,16 +319,21 @@ export class BackgroundAudioPlayer {
    * Close and cleanup the background audio system
    */
   async close(): Promise<void> {
+    this.closed = true;
     await cancelAndWait(this.playTasks, TASK_TIMEOUT_MS);
     if (this.republishTask) {
       await this.republishTask.cancelAndWait(TASK_TIMEOUT_MS);
     }
-    // TODO (Brian): cancel audio mixer task and close audio mixer
+    await this.audioMixer.aclose();
     await this.audioSource.close();
+    if (this.mixerTask) {
+      await this.mixerTask.cancelAndWait(TASK_TIMEOUT_MS);
+    }
     this.agentSession?.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
     this.room?.off('reconnected', this.onReconnected);
@@ -362,6 +385,12 @@ export class BackgroundAudioPlayer {
     await this.publishTrack();
   }
+  private async runMixerTask(): Promise<void> {
+    for await (const frame of this.audioMixer) {
+      await this.audioSource.captureFrame(frame);
+    }
+  }
   private onAgentStateChanged = (ev: AgentStateChangedEvent): void => {
     if (!this.thinkingSound) {
       return;
@@ -372,12 +401,45 @@ export class BackgroundAudioPlayer {
         return;
       }
-      // TODO (Brian): play thinking sound and assign to thinkingHandle
+      const normalized = this.normalizeSoundSource(this.thinkingSound);
+      if (normalized) {
+        const { source, volume } = normalized;
+        const selectedSound: AudioConfig = { source, volume, probability: 1.0 };
+        // Loop thinking sound while in thinking state (same as ambient)
+        this.thinkingHandle = this.play(selectedSound, typeof source === 'string');
+      }
     } else {
       this.thinkingHandle?.stop();
     }
   };
+  // Note: Python uses numpy, TS uses typed arrays for equivalent logic
+  private applyVolumeToFrame(frame: AudioFrame, volume: number): AudioFrame {
+    const int16Data = new Int16Array(
+      frame.data.buffer,
+      frame.data.byteOffset,
+      frame.data.byteLength / 2,
+    );
+    const float32Data = new Float32Array(int16Data.length);
+    for (let i = 0; i < int16Data.length; i++) {
+      float32Data[i] = int16Data[i]!;
+    }
+    const volumeFactor = 10 ** Math.log10(volume);
+    for (let i = 0; i < float32Data.length; i++) {
+      float32Data[i]! *= volumeFactor;
+    }
+    const outputData = new Int16Array(float32Data.length);
+    for (let i = 0; i < float32Data.length; i++) {
+      const clipped = Math.max(-32768, Math.min(32767, float32Data[i]!));
+      outputData[i] = Math.round(clipped);
+    }
+    return new AudioFrame(outputData, frame.sampleRate, frame.channels, frame.samplesPerChannel);
+  }
   private async playTask({
     playHandle,
     sound,
@@ -395,57 +457,35 @@ export class BackgroundAudioPlayer {
       sound = getBuiltinAudioPath(sound);
     }
+    let audioStream: AsyncIterable<AudioFrame>;
     if (typeof sound === 'string') {
-      sound = loop
+      audioStream = loop
         ? loopAudioFramesFromFile(sound, { abortSignal: signal })
         : audioFramesFromFile(sound, { abortSignal: signal });
+    } else {
+      audioStream = sound;
     }
-    try {
-      for await (const frame of sound) {
+    const applyVolume = this.applyVolumeToFrame.bind(this);
+    async function* genWrapper(): AsyncGenerator<AudioFrame> {
+      for await (const frame of audioStream) {
         if (signal.aborted || playHandle.done()) break;
-        let processedFrame: AudioFrame;
-        if (volume !== 1.0) {
-          const int16Data = new Int16Array(
-            frame.data.buffer,
-            frame.data.byteOffset,
-            frame.data.byteLength / 2,
-          );
-          const float32Data = new Float32Array(int16Data.length);
-          for (let i = 0; i < int16Data.length; i++) {
-            float32Data[i] = int16Data[i]!;
-          }
-          const volumeFactor = 10 ** Math.log10(volume);
-          for (let i = 0; i < float32Data.length; i++) {
-            float32Data[i]! *= volumeFactor;
-          }
-          const outputData = new Int16Array(float32Data.length);
-          for (let i = 0; i < float32Data.length; i++) {
-            const clipped = Math.max(-32768, Math.min(32767, float32Data[i]!));
-            outputData[i] = Math.round(clipped);
-          }
-          processedFrame = new AudioFrame(
-            outputData,
-            frame.sampleRate,
-            frame.channels,
-            frame.samplesPerChannel,
-          );
-        } else {
-          processedFrame = frame;
-        }
-        // TODO (Brian): use AudioMixer to add/remove frame streams
-        await this.audioSource.captureFrame(processedFrame);
+        yield volume !== 1.0 ? applyVolume(frame, volume) : frame;
       }
+      playHandle._markPlayoutDone();
+    }
+    const gen = genWrapper();
+    try {
+      this.audioMixer.addStream(gen);
+      await playHandle.waitForPlayout();
     } finally {
-      // TODO: the waitForPlayout() may be innaccurate by 400ms
+      this.audioMixer.removeStream(gen);
       playHandle._markPlayoutDone();
+      if (playHandle.done()) {
+        await gen.return(undefined);
+      }
     }
   }
 }

package/src/voice/index.ts CHANGED Viewed

@@ -6,6 +6,7 @@ export { AgentSession, type AgentSessionOptions } from './agent_session.js';
 export * from './avatar/index.js';
 export * from './background_audio.js';
 export * from './events.js';
+export { type TimedString } from './io.js';
 export * from './report.js';
 export * from './room_io/index.js';
 export { RunContext } from './run_context.js';

package/src/voice/io.ts CHANGED Viewed

@@ -29,6 +29,20 @@ export type TTSNode = (
   modelSettings: ModelSettings,
 ) => Promise<ReadableStream<AudioFrame> | null>;
+/**
+ * A string with timing information for word-level alignment.
+ */
+export interface TimedString {
+  text: string;
+  startTime?: number; // seconds
+  endTime?: number; // seconds
+}
+export interface AudioOutputCapabilities {
+  /** Whether this output supports pause/resume functionality */
+  pause: boolean;
+}
 export abstract class AudioInput {
   protected deferredStream: DeferredReadableStream<AudioFrame> =
     new DeferredReadableStream<AudioFrame>();
@@ -54,12 +68,15 @@ export abstract class AudioOutput extends EventEmitter {
     interrupted: false,
   };
   protected logger = log();
+  protected readonly capabilities: AudioOutputCapabilities;
   constructor(
     public sampleRate?: number,
     protected readonly nextInChain?: AudioOutput,
+    capabilities: AudioOutputCapabilities = { pause: false },
   ) {
     super();
+    this.capabilities = capabilities;
     if (this.nextInChain) {
       this.nextInChain.on(AudioOutput.EVENT_PLAYBACK_FINISHED, (ev: PlaybackFinishedEvent) =>
         this.onPlaybackFinished(ev),
@@ -67,6 +84,13 @@ export abstract class AudioOutput extends EventEmitter {
     }
   }
+  /**
+   * Whether this output and all outputs in the chain support pause/resume.
+   */
+  get canPause(): boolean {
+    return this.capabilities.pause && (this.nextInChain?.canPause ?? true);
+  }
   /**
    * Capture an audio frame for playback, frames can be pushed faster than real-time
    */

package/src/voice/recorder_io/recorder_io.ts CHANGED Viewed

@@ -465,7 +465,7 @@ class RecorderAudioOutput extends AudioOutput {
     audioOutput: AudioOutput,
     writeFn: (buf: AudioFrame[]) => void,
   ) {
-    super(audioOutput.sampleRate, audioOutput);
+    super(audioOutput.sampleRate, audioOutput, { pause: true });
     this.recorderIO = recorderIO;
     this.writeFn = writeFn;
   }

package/src/voice/room_io/_output.ts CHANGED Viewed

@@ -328,7 +328,7 @@ export class ParticipantAudioOutput extends AudioOutput {
   private interruptedFuture: Future<void> = new Future();
   constructor(room: Room, options: AudioOutputOptions) {
-    super(options.sampleRate);
+    super(options.sampleRate, undefined, { pause: true });
     this.room = room;
     this.options = options;
     this.audioSource = new AudioSource(options.sampleRate, options.numChannels);

package/src/voice/transcription/synchronizer.ts CHANGED Viewed

@@ -362,7 +362,7 @@ class SyncedAudioOutput extends AudioOutput {
     public synchronizer: TranscriptionSynchronizer,
     private nextInChainAudio: AudioOutput,
   ) {
-    super(nextInChainAudio.sampleRate, nextInChainAudio);
+    super(nextInChainAudio.sampleRate, nextInChainAudio, { pause: true });
   }
   async captureFrame(frame: AudioFrame): Promise<void> {

package/src/worker.ts CHANGED Viewed

@@ -384,7 +384,7 @@ export class AgentServer {
         try {
           await new Promise((resolve, reject) => {
             this.#session!.on('open', resolve);
-            this.#session!.on('error', (error) => reject(error.message));
+            this.#session!.on('error', (error) => reject(error));
             this.#session!.on('close', (code) => reject(`WebSocket returned ${code}`));
           });
@@ -392,14 +392,10 @@ export class AgentServer {
           this.#logger.debug('connected to LiveKit server');
           await this.#runWS(this.#session);
         } catch (e: unknown) {
-          if (e instanceof Error || e instanceof ErrorEvent) {
-            e = e.message;
-          }
           if (this.#closed) return;
           if (retries >= this.#opts.maxRetry) {
             throw new WorkerError(
-              `failed to connect to LiveKit server after ${retries} attempts: ${e}`,
+              `failed to connect to LiveKit server (${this.#opts.wsURL}) after ${retries} attempts: ${e}`,
             );
           }
@@ -407,7 +403,8 @@ export class AgentServer {
           const delay = Math.min(retries * 2, 10);
           this.#logger.warn(
-            `failed to connect to LiveKit server, retrying in ${delay} seconds: ${e} (${retries}/${this.#opts.maxRetry})`,
+            e,
+            `failed to connect to LiveKit server (${this.#opts.wsURL}), retrying in ${delay} seconds: (${retries}/${this.#opts.maxRetry})`,
           );
           await new Promise((resolve) => setTimeout(resolve, delay * 1000));