@livekit/agents 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,451 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import {
5
+ AudioFrame,
6
+ AudioSource,
7
+ LocalAudioTrack,
8
+ type LocalTrackPublication,
9
+ type Room,
10
+ TrackPublishOptions,
11
+ } from '@livekit/rtc-node';
12
+ import { dirname, join } from 'node:path';
13
+ import { fileURLToPath } from 'node:url';
14
+ import { audioFramesFromFile, loopAudioFramesFromFile } from '../audio.js';
15
+ import { log } from '../log.js';
16
+ import { Future, Task, cancelAndWait } from '../utils.js';
17
+ import type { AgentSession } from './agent_session.js';
18
+ import { AgentSessionEventTypes, type AgentStateChangedEvent } from './events.js';
19
+
20
+ const TASK_TIMEOUT_MS = 500;
21
+
22
+ export enum BuiltinAudioClip {
23
+ OFFICE_AMBIENCE = 'office-ambience.ogg',
24
+ KEYBOARD_TYPING = 'keyboard-typing.ogg',
25
+ KEYBOARD_TYPING2 = 'keyboard-typing2.ogg',
26
+ }
27
+
28
+ export function isBuiltinAudioClip(
29
+ source: AudioSourceType | AudioConfig | AudioConfig[],
30
+ ): source is BuiltinAudioClip {
31
+ return (
32
+ typeof source === 'string' &&
33
+ Object.values(BuiltinAudioClip).includes(source as BuiltinAudioClip)
34
+ );
35
+ }
36
+
37
+ export function getBuiltinAudioPath(clip: BuiltinAudioClip): string {
38
+ const resourcesPath = join(dirname(fileURLToPath(import.meta.url)), '../../resources');
39
+ return join(resourcesPath, clip);
40
+ }
41
+
42
+ export type AudioSourceType = string | BuiltinAudioClip | AsyncIterable<AudioFrame>;
43
+
44
+ export interface AudioConfig {
45
+ source: AudioSourceType;
46
+ volume?: number;
47
+ probability?: number;
48
+ }
49
+
50
+ export interface BackgroundAudioPlayerOptions {
51
+ /**
52
+ * Ambient sound to play continuously in the background.
53
+ * Can be a file path, BuiltinAudioClip, or AudioConfig.
54
+ * File paths will be looped automatically.
55
+ */
56
+ ambientSound?: AudioSourceType | AudioConfig | AudioConfig[];
57
+
58
+ /**
59
+ * Sound to play when the agent is thinking.
60
+ * TODO (Brian): Implement thinking sound when AudioMixer becomes available
61
+ */
62
+ thinkingSound?: AudioSourceType | AudioConfig | AudioConfig[];
63
+
64
+ /**
65
+ * Stream timeout in milliseconds
66
+ * @defaultValue 200
67
+ */
68
+ streamTimeoutMs?: number;
69
+ }
70
+
71
+ export interface BackgroundAudioStartOptions {
72
+ room: Room;
73
+ agentSession?: AgentSession;
74
+ trackPublishOptions?: TrackPublishOptions;
75
+ }
76
+
77
+ // Queue size for AudioSource buffer (400ms)
78
+ // Kept small to avoid abrupt cutoffs when removing sounds
79
+ const AUDIO_SOURCE_BUFFER_MS = 400;
80
+
81
+ export class PlayHandle {
82
+ private doneFuture = new Future<void>();
83
+ private stopFuture = new Future<void>();
84
+
85
+ done(): boolean {
86
+ return this.doneFuture.done;
87
+ }
88
+
89
+ stop(): void {
90
+ if (this.done()) return;
91
+
92
+ if (!this.stopFuture.done) {
93
+ this.stopFuture.resolve();
94
+ }
95
+
96
+ this._markPlayoutDone();
97
+ }
98
+
99
+ async waitForPlayout(): Promise<void> {
100
+ return this.doneFuture.await;
101
+ }
102
+
103
+ _markPlayoutDone(): void {
104
+ if (!this.doneFuture.done) {
105
+ this.doneFuture.resolve();
106
+ }
107
+ }
108
+ }
109
+
110
+ /**
111
+ * Manages background audio playback for LiveKit agent sessions
112
+ *
113
+ * This class handles playing ambient sounds and manages audio track publishing.
114
+ * It supports:
115
+ * - Continuous ambient sound playback with looping
116
+ * - Volume control and probability-based sound selection
117
+ * - Integration with LiveKit rooms and agent sessions
118
+ *
119
+ * Note: Thinking sound not yet supported
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * const player = new BackgroundAudioPlayer({
124
+ * ambientSound: { source: BuiltinAudioClip.OFFICE_AMBIENCE, volume: 0.8 },
125
+ * });
126
+ *
127
+ * await player.start({ room, agentSession });
128
+ * ```
129
+ */
130
+ export class BackgroundAudioPlayer {
131
+ private ambientSound?: AudioSourceType | AudioConfig | AudioConfig[];
132
+ private thinkingSound?: AudioSourceType | AudioConfig | AudioConfig[];
133
+
134
+ private playTasks: Task<void>[] = [];
135
+ private audioSource = new AudioSource(48000, 1, AUDIO_SOURCE_BUFFER_MS);
136
+
137
+ private room?: Room;
138
+ private agentSession?: AgentSession;
139
+ private publication?: LocalTrackPublication;
140
+ private trackPublishOptions?: TrackPublishOptions;
141
+ private republishTask?: Task<void>;
142
+
143
+ private ambientHandle?: PlayHandle;
144
+ private thinkingHandle?: PlayHandle;
145
+
146
+ // TODO (Brian): add lock
147
+
148
+ #logger = log();
149
+
150
+ constructor(options?: BackgroundAudioPlayerOptions) {
151
+ const { ambientSound, thinkingSound } = options || {};
152
+
153
+ this.ambientSound = ambientSound;
154
+ this.thinkingSound = thinkingSound;
155
+
156
+ if (this.thinkingSound) {
157
+ this.#logger.warn('thinkingSound is not yet supported');
158
+ // TODO: Implement thinking sound when AudioMixer becomes available
159
+ }
160
+ }
161
+
162
+ /**
163
+ * Select a sound from a list of background sound based on probability weights
164
+ * Return undefined if no sound is selected (when sum of probabilities < 1.0).
165
+ */
166
+ private selectSoundFromList(sounds: AudioConfig[]): AudioConfig | undefined {
167
+ const totalProbability = sounds.reduce((sum, sound) => sum + (sound.probability ?? 1.0), 0);
168
+
169
+ if (totalProbability <= 0) {
170
+ return undefined;
171
+ }
172
+
173
+ if (totalProbability < 1.0 && Math.random() > totalProbability) {
174
+ return undefined;
175
+ }
176
+
177
+ const normalizeFactor = totalProbability <= 1.0 ? 1.0 : totalProbability;
178
+ const r = Math.random() * Math.min(totalProbability, 1.0);
179
+ let cumulative = 0.0;
180
+
181
+ for (const sound of sounds) {
182
+ const prob = sound.probability ?? 1.0;
183
+ if (prob <= 0) {
184
+ continue;
185
+ }
186
+
187
+ const normProb = prob / normalizeFactor;
188
+ cumulative += normProb;
189
+
190
+ if (r <= cumulative) {
191
+ return sound;
192
+ }
193
+ }
194
+
195
+ return sounds[sounds.length - 1];
196
+ }
197
+
198
+ private normalizeSoundSource(
199
+ source?: AudioSourceType | AudioConfig | AudioConfig[],
200
+ ): { source: AudioSourceType; volume: number } | undefined {
201
+ if (source === undefined) {
202
+ return undefined;
203
+ }
204
+
205
+ if (typeof source === 'string') {
206
+ return {
207
+ source: this.normalizeBuiltinAudio(source),
208
+ volume: 1.0,
209
+ };
210
+ }
211
+
212
+ if (Array.isArray(source)) {
213
+ const selected = this.selectSoundFromList(source);
214
+ if (selected === undefined) {
215
+ return undefined;
216
+ }
217
+
218
+ return {
219
+ source: selected.source,
220
+ volume: selected.volume ?? 1.0,
221
+ };
222
+ }
223
+
224
+ if (typeof source === 'object' && 'source' in source) {
225
+ return {
226
+ source: this.normalizeBuiltinAudio(source.source),
227
+ volume: source.volume ?? 1.0,
228
+ };
229
+ }
230
+
231
+ return { source, volume: 1.0 };
232
+ }
233
+
234
+ private normalizeBuiltinAudio(source: AudioSourceType): AudioSourceType {
235
+ if (isBuiltinAudioClip(source)) {
236
+ return getBuiltinAudioPath(source);
237
+ }
238
+ return source;
239
+ }
240
+
241
+ play(audio: AudioSourceType | AudioConfig | AudioConfig[], loop = false): PlayHandle {
242
+ const normalized = this.normalizeSoundSource(audio);
243
+ if (normalized === undefined) {
244
+ const handle = new PlayHandle();
245
+ handle._markPlayoutDone();
246
+ return handle;
247
+ }
248
+
249
+ const { source, volume } = normalized;
250
+ const playHandle = new PlayHandle();
251
+
252
+ const task = Task.from(async ({ signal }) => {
253
+ await this.playTask({ playHandle, sound: source, volume, loop, signal });
254
+ });
255
+
256
+ task.addDoneCallback(() => {
257
+ playHandle._markPlayoutDone();
258
+ this.playTasks.splice(this.playTasks.indexOf(task), 1);
259
+ });
260
+
261
+ this.playTasks.push(task);
262
+ return playHandle;
263
+ }
264
+
265
+ /**
266
+ * Start the background audio system, publishing the audio track
267
+ * and beginning playback of any configured ambient sound.
268
+ *
269
+ * If `ambientSound` is provided (and contains file paths), they will loop
270
+ * automatically. If `ambientSound` contains AsyncIterators, they are assumed
271
+ * to be already infinite or looped.
272
+ *
273
+ * @param options - Options for starting background audio playback
274
+ */
275
+ async start(options: BackgroundAudioStartOptions): Promise<void> {
276
+ const { room, agentSession, trackPublishOptions } = options;
277
+ this.room = room;
278
+ this.agentSession = agentSession;
279
+ this.trackPublishOptions = trackPublishOptions;
280
+
281
+ await this.publishTrack();
282
+
283
+ // TODO (Brian): check job context is not fake
284
+
285
+ // TODO (Brian): start audio mixer task
286
+ this.room.on('reconnected', this.onReconnected);
287
+
288
+ this.agentSession?.on(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
289
+
290
+ if (!this.ambientSound) return;
291
+
292
+ const normalized = this.normalizeSoundSource(this.ambientSound);
293
+ if (!normalized) return;
294
+
295
+ const { source, volume } = normalized;
296
+ const selectedSound: AudioConfig = { source, volume, probability: 1.0 };
297
+ this.ambientHandle = this.play(selectedSound, typeof source === 'string');
298
+ }
299
+
300
+ /**
301
+ * Close and cleanup the background audio system
302
+ */
303
+ async close(): Promise<void> {
304
+ await cancelAndWait(this.playTasks, TASK_TIMEOUT_MS);
305
+
306
+ if (this.republishTask) {
307
+ await this.republishTask.cancelAndWait(TASK_TIMEOUT_MS);
308
+ }
309
+
310
+ // TODO (Brian): cancel audio mixer task and close audio mixer
311
+
312
+ await this.audioSource.close();
313
+
314
+ this.agentSession?.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
315
+ this.room?.off('reconnected', this.onReconnected);
316
+
317
+ if (this.publication && this.publication.sid) {
318
+ await this.room?.localParticipant?.unpublishTrack(this.publication.sid);
319
+ }
320
+ }
321
+
322
+ /**
323
+ * Get the current track publication
324
+ */
325
+ getPublication(): LocalTrackPublication | undefined {
326
+ return this.publication;
327
+ }
328
+
329
+ private async publishTrack(): Promise<void> {
330
+ if (this.publication !== undefined) {
331
+ return;
332
+ }
333
+
334
+ const track = LocalAudioTrack.createAudioTrack('background_audio', this.audioSource);
335
+
336
+ if (this.room?.localParticipant === undefined) {
337
+ throw new Error('Local participant not available');
338
+ }
339
+
340
+ const publication = await this.room.localParticipant.publishTrack(
341
+ track,
342
+ this.trackPublishOptions ?? new TrackPublishOptions(),
343
+ );
344
+
345
+ this.publication = publication;
346
+ this.#logger.debug(`Background audio track published: ${this.publication.sid}`);
347
+ }
348
+
349
+ private onReconnected = (): void => {
350
+ if (this.republishTask) {
351
+ this.republishTask.cancel();
352
+ }
353
+
354
+ this.publication = undefined;
355
+ this.republishTask = Task.from(async () => {
356
+ await this.republishTrackTask();
357
+ });
358
+ };
359
+
360
+ private async republishTrackTask(): Promise<void> {
361
+ // TODO (Brian): add lock protection when implementing lock
362
+ await this.publishTrack();
363
+ }
364
+
365
+ private onAgentStateChanged = (ev: AgentStateChangedEvent): void => {
366
+ if (!this.thinkingSound) {
367
+ return;
368
+ }
369
+
370
+ if (ev.newState === 'thinking') {
371
+ if (this.thinkingHandle && !this.thinkingHandle.done()) {
372
+ return;
373
+ }
374
+
375
+ // TODO (Brian): play thinking sound and assign to thinkingHandle
376
+ } else {
377
+ this.thinkingHandle?.stop();
378
+ }
379
+ };
380
+
381
+ private async playTask({
382
+ playHandle,
383
+ sound,
384
+ volume,
385
+ loop,
386
+ signal,
387
+ }: {
388
+ playHandle: PlayHandle;
389
+ sound: AudioSourceType;
390
+ volume: number;
391
+ loop: boolean;
392
+ signal: AbortSignal;
393
+ }): Promise<void> {
394
+ if (isBuiltinAudioClip(sound)) {
395
+ sound = getBuiltinAudioPath(sound);
396
+ }
397
+
398
+ if (typeof sound === 'string') {
399
+ sound = loop
400
+ ? loopAudioFramesFromFile(sound, { abortSignal: signal })
401
+ : audioFramesFromFile(sound, { abortSignal: signal });
402
+ }
403
+
404
+ try {
405
+ for await (const frame of sound) {
406
+ if (signal.aborted || playHandle.done()) break;
407
+
408
+ let processedFrame: AudioFrame;
409
+
410
+ if (volume !== 1.0) {
411
+ const int16Data = new Int16Array(
412
+ frame.data.buffer,
413
+ frame.data.byteOffset,
414
+ frame.data.byteLength / 2,
415
+ );
416
+ const float32Data = new Float32Array(int16Data.length);
417
+
418
+ for (let i = 0; i < int16Data.length; i++) {
419
+ float32Data[i] = int16Data[i]!;
420
+ }
421
+
422
+ const volumeFactor = 10 ** Math.log10(volume);
423
+ for (let i = 0; i < float32Data.length; i++) {
424
+ float32Data[i]! *= volumeFactor;
425
+ }
426
+
427
+ const outputData = new Int16Array(float32Data.length);
428
+ for (let i = 0; i < float32Data.length; i++) {
429
+ const clipped = Math.max(-32768, Math.min(32767, float32Data[i]!));
430
+ outputData[i] = Math.round(clipped);
431
+ }
432
+
433
+ processedFrame = new AudioFrame(
434
+ outputData,
435
+ frame.sampleRate,
436
+ frame.channels,
437
+ frame.samplesPerChannel,
438
+ );
439
+ } else {
440
+ processedFrame = frame;
441
+ }
442
+
443
+ // TODO (Brian): use AudioMixer to add/remove frame streams
444
+ await this.audioSource.captureFrame(processedFrame);
445
+ }
446
+ } finally {
447
+ // TODO: the waitForPlayout() may be innaccurate by 400ms
448
+ playHandle._markPlayoutDone();
449
+ }
450
+ }
451
+ }
@@ -3,8 +3,8 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
5
5
  export { AgentSession, type AgentSessionOptions } from './agent_session.js';
6
-
7
6
  export * from './avatar/index.js';
7
+ export * from './background_audio.js';
8
8
  export * from './events.js';
9
9
  export * from './room_io/index.js';
10
10
  export { RunContext } from './run_context.js';