@livekit/agents 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +26 -0
  3. package/dist/audio.d.ts +1 -4
  4. package/dist/audio.d.ts.map +1 -1
  5. package/dist/audio.js +28 -11
  6. package/dist/audio.js.map +1 -1
  7. package/dist/cli.d.ts +1 -1
  8. package/dist/cli.d.ts.map +1 -1
  9. package/dist/cli.js +36 -13
  10. package/dist/cli.js.map +1 -1
  11. package/dist/generator.d.ts +5 -0
  12. package/dist/generator.d.ts.map +1 -1
  13. package/dist/generator.js +11 -0
  14. package/dist/generator.js.map +1 -1
  15. package/dist/http_server.d.ts.map +1 -1
  16. package/dist/http_server.js +5 -0
  17. package/dist/http_server.js.map +1 -1
  18. package/dist/index.d.ts +3 -1
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +3 -1
  21. package/dist/index.js.map +1 -1
  22. package/dist/ipc/job_main.js +9 -1
  23. package/dist/ipc/job_main.js.map +1 -1
  24. package/dist/ipc/proc_pool.d.ts.map +1 -1
  25. package/dist/ipc/proc_pool.js +1 -0
  26. package/dist/ipc/proc_pool.js.map +1 -1
  27. package/dist/job.d.ts +1 -0
  28. package/dist/job.d.ts.map +1 -1
  29. package/dist/job.js +17 -1
  30. package/dist/job.js.map +1 -1
  31. package/dist/multimodal/agent_playout.d.ts +34 -0
  32. package/dist/multimodal/agent_playout.d.ts.map +1 -0
  33. package/dist/multimodal/agent_playout.js +221 -0
  34. package/dist/multimodal/agent_playout.js.map +1 -0
  35. package/dist/multimodal/index.d.ts +3 -0
  36. package/dist/multimodal/index.d.ts.map +1 -0
  37. package/dist/multimodal/index.js +6 -0
  38. package/dist/multimodal/index.js.map +1 -0
  39. package/dist/multimodal/multimodal_agent.d.ts +47 -0
  40. package/dist/multimodal/multimodal_agent.d.ts.map +1 -0
  41. package/dist/multimodal/multimodal_agent.js +331 -0
  42. package/dist/multimodal/multimodal_agent.js.map +1 -0
  43. package/dist/transcription.d.ts +22 -0
  44. package/dist/transcription.d.ts.map +1 -0
  45. package/dist/transcription.js +111 -0
  46. package/dist/transcription.js.map +1 -0
  47. package/dist/utils.d.ts +27 -0
  48. package/dist/utils.d.ts.map +1 -1
  49. package/dist/utils.js +107 -9
  50. package/dist/utils.js.map +1 -1
  51. package/dist/worker.d.ts +3 -1
  52. package/dist/worker.d.ts.map +1 -1
  53. package/dist/worker.js +44 -8
  54. package/dist/worker.js.map +1 -1
  55. package/package.json +6 -4
  56. package/src/audio.ts +19 -19
  57. package/src/cli.ts +37 -13
  58. package/src/generator.ts +14 -0
  59. package/src/http_server.ts +5 -0
  60. package/src/index.ts +3 -1
  61. package/src/ipc/job_main.ts +9 -2
  62. package/src/ipc/proc_pool.ts +1 -0
  63. package/src/job.ts +21 -1
  64. package/src/multimodal/agent_playout.ts +254 -0
  65. package/src/multimodal/index.ts +5 -0
  66. package/src/multimodal/multimodal_agent.ts +428 -0
  67. package/src/transcription.ts +128 -0
  68. package/src/utils.ts +138 -6
  69. package/src/worker.ts +54 -10
  70. package/tsconfig.json +1 -1
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@livekit/agents",
3
- "version": "0.2.0",
4
- "description": "LiveKit Node Agents",
3
+ "version": "0.3.0",
4
+ "description": "LiveKit Agents - Node.js",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
- "author": "aoife cassidy <aoife@livekit.io>",
7
+ "author": "LiveKit",
8
8
  "type": "module",
9
9
  "devDependencies": {
10
10
  "@microsoft/api-extractor": "^7.35.0",
@@ -14,7 +14,7 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "@livekit/protocol": "^1.21.0",
17
- "@livekit/rtc-node": "^0.8.1",
17
+ "@livekit/rtc-node": "^0.9.0",
18
18
  "commander": "^12.0.0",
19
19
  "livekit-server-sdk": "^2.6.1",
20
20
  "pino": "^8.19.0",
@@ -24,6 +24,8 @@
24
24
  },
25
25
  "scripts": {
26
26
  "build": "tsc",
27
+ "clean": "rm -rf dist",
28
+ "clean:build": "pnpm clean && pnpm build",
27
29
  "lint": "eslint -f unix \"src/**/*.ts\"",
28
30
  "api:check": "api-extractor run --typescript-compiler-folder ../node_modules/typescript",
29
31
  "api:update": "api-extractor run --local --typescript-compiler-folder ../node_modules/typescript --verbose"
package/src/audio.ts CHANGED
@@ -6,36 +6,36 @@ import { log } from './log.js';
6
6
 
7
7
  /** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */
8
8
  export class AudioByteStream {
9
- private sampleRate: number;
10
- private numChannels: number;
11
- private bytesPerFrame: number;
12
- private buf: Int8Array;
9
+ #sampleRate: number;
10
+ #numChannels: number;
11
+ #bytesPerFrame: number;
12
+ #buf: Int8Array;
13
13
 
14
14
  constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {
15
- this.sampleRate = sampleRate;
16
- this.numChannels = numChannels;
15
+ this.#sampleRate = sampleRate;
16
+ this.#numChannels = numChannels;
17
17
 
18
18
  if (samplesPerChannel === null) {
19
19
  samplesPerChannel = Math.floor(sampleRate / 50); // 20ms by default
20
20
  }
21
21
 
22
- this.bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)
23
- this.buf = new Int8Array();
22
+ this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)
23
+ this.#buf = new Int8Array();
24
24
  }
25
25
 
26
26
  write(data: ArrayBuffer): AudioFrame[] {
27
- this.buf = new Int8Array([...this.buf, ...new Int8Array(data)]);
27
+ this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);
28
28
 
29
29
  const frames: AudioFrame[] = [];
30
- while (this.buf.length >= this.bytesPerFrame) {
31
- const frameData = this.buf.slice(0, this.bytesPerFrame);
32
- this.buf = this.buf.slice(this.bytesPerFrame);
30
+ while (this.#buf.length >= this.#bytesPerFrame) {
31
+ const frameData = this.#buf.slice(0, this.#bytesPerFrame);
32
+ this.#buf = this.#buf.slice(this.#bytesPerFrame);
33
33
 
34
34
  frames.push(
35
35
  new AudioFrame(
36
36
  new Int16Array(frameData.buffer),
37
- this.sampleRate,
38
- this.numChannels,
37
+ this.#sampleRate,
38
+ this.#numChannels,
39
39
  frameData.length / 2,
40
40
  ),
41
41
  );
@@ -45,17 +45,17 @@ export class AudioByteStream {
45
45
  }
46
46
 
47
47
  flush(): AudioFrame[] {
48
- if (this.buf.length % (2 * this.numChannels) !== 0) {
48
+ if (this.#buf.length % (2 * this.#numChannels) !== 0) {
49
49
  log().warn('AudioByteStream: incomplete frame during flush, dropping');
50
50
  return [];
51
51
  }
52
52
 
53
53
  return [
54
54
  new AudioFrame(
55
- new Int16Array(this.buf.buffer),
56
- this.sampleRate,
57
- this.numChannels,
58
- this.buf.length / 2,
55
+ new Int16Array(this.#buf.buffer),
56
+ this.#sampleRate,
57
+ this.#numChannels,
58
+ this.#buf.length / 2,
59
59
  ),
60
60
  ];
61
61
  }
package/src/cli.ts CHANGED
@@ -5,7 +5,7 @@ import { Command, Option } from 'commander';
5
5
  import type { EventEmitter } from 'events';
6
6
  import { initializeLogger, log } from './log.js';
7
7
  import { version } from './version.js';
8
- import { Worker, type WorkerOptions } from './worker.js';
8
+ import { Worker, WorkerOptions } from './worker.js';
9
9
 
10
10
  type CliArgs = {
11
11
  opts: WorkerOptions;
@@ -18,7 +18,10 @@ type CliArgs = {
18
18
 
19
19
  const runWorker = async (args: CliArgs) => {
20
20
  initializeLogger({ pretty: !args.production, level: args.opts.logLevel });
21
- const worker = new Worker(args.opts);
21
+
22
+ // though `production` is defined in WorkerOptions, it will always be overriddden by CLI.
23
+ const { production: _, ...opts } = args.opts; // eslint-disable-line @typescript-eslint/no-unused-vars
24
+ const worker = new Worker(new WorkerOptions({ production: args.production, ...opts }));
22
25
 
23
26
  if (args.room) {
24
27
  worker.event.once('worker_registered', () => {
@@ -72,20 +75,29 @@ export const runApp = (opts: WorkerOptions) => {
72
75
  .env('LOG_LEVEL'),
73
76
  )
74
77
  .addOption(
75
- new Option('--url <string>', 'LiveKit server or Cloud project websocket URL')
76
- .makeOptionMandatory(true)
77
- .env('LIVEKIT_URL'),
78
+ new Option('--url <string>', 'LiveKit server or Cloud project websocket URL').env(
79
+ 'LIVEKIT_URL',
80
+ ),
78
81
  )
79
82
  .addOption(
80
- new Option('--api-key <string>', "LiveKit server or Cloud project's API key")
81
- .makeOptionMandatory(true)
82
- .env('LIVEKIT_API_KEY'),
83
+ new Option('--api-key <string>', "LiveKit server or Cloud project's API key").env(
84
+ 'LIVEKIT_API_KEY',
85
+ ),
83
86
  )
84
87
  .addOption(
85
- new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret")
86
- .makeOptionMandatory(true)
87
- .env('LIVEKIT_API_SECRET'),
88
- );
88
+ new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret").env(
89
+ 'LIVEKIT_API_SECRET',
90
+ ),
91
+ )
92
+ .action(() => {
93
+ if (
94
+ // do not run CLI if origin file is agents/ipc/job_main.js
95
+ process.argv[1] !== new URL('ipc/job_main.js', import.meta.url).pathname ||
96
+ process.argv.length < 3
97
+ ) {
98
+ program.help();
99
+ }
100
+ });
89
101
 
90
102
  program
91
103
  .command('start')
@@ -106,6 +118,12 @@ export const runApp = (opts: WorkerOptions) => {
106
118
  program
107
119
  .command('dev')
108
120
  .description('Start the worker in development mode')
121
+ .addOption(
122
+ new Option('--log-level <level>', 'Set the logging level')
123
+ .choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
124
+ .default('debug')
125
+ .env('LOG_LEVEL'),
126
+ )
109
127
  .action(() => {
110
128
  const options = program.optsWithGlobals();
111
129
  opts.wsURL = options.url || opts.wsURL;
@@ -123,7 +141,13 @@ export const runApp = (opts: WorkerOptions) => {
123
141
  .command('connect')
124
142
  .description('Connect to a specific room')
125
143
  .requiredOption('--room <string>', 'Room name to connect to')
126
- .option('--participant-identity <string>', 'Participant identitiy to connect as')
144
+ .option('--participant-identity <string>', 'Identity of user to listen to')
145
+ .addOption(
146
+ new Option('--log-level <level>', 'Set the logging level')
147
+ .choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
148
+ .default('debug')
149
+ .env('LOG_LEVEL'),
150
+ )
127
151
  .action((...[, command]) => {
128
152
  const options = command.optsWithGlobals();
129
153
  opts.wsURL = options.url || opts.wsURL;
package/src/generator.ts CHANGED
@@ -9,6 +9,20 @@ export interface Agent {
9
9
  prewarm?: (proc: JobProcess) => unknown;
10
10
  }
11
11
 
12
+ /** Helper to check if an object is an agent before running it.
13
+ *
14
+ * @internal
15
+ */
16
+ export function isAgent(obj: unknown): obj is Agent {
17
+ return (
18
+ typeof obj === 'object' &&
19
+ obj !== null &&
20
+ 'entry' in obj &&
21
+ typeof (obj as Agent).entry === 'function' &&
22
+ (('prewarm' in obj && typeof (obj as Agent).prewarm === 'function') || !('prewarm' in obj))
23
+ );
24
+ }
25
+
12
26
  /**
13
27
  * Helper to define an agent according to the required interface.
14
28
  * @example A basic agent with entry and prewarm functions
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { type IncomingMessage, type Server, type ServerResponse, createServer } from 'http';
5
+ import { log } from './log.js';
5
6
 
6
7
  const healthCheck = async (res: ServerResponse) => {
7
8
  res.writeHead(200);
@@ -31,6 +32,10 @@ export class HTTPServer {
31
32
  return new Promise((resolve, reject) => {
32
33
  this.app.listen(this.port, this.host, (err?: Error) => {
33
34
  if (err) reject(err);
35
+ const address = this.app.address();
36
+ if (typeof address! !== 'string') {
37
+ log().info(`Server is listening on port ${address!.port}`);
38
+ }
34
39
  resolve();
35
40
  });
36
41
  });
package/src/index.ts CHANGED
@@ -11,6 +11,7 @@
11
11
  */
12
12
  import * as cli from './cli.js';
13
13
  import * as llm from './llm/index.js';
14
+ import * as multimodal from './multimodal/index.js';
14
15
  import * as stt from './stt/index.js';
15
16
  import * as tts from './tts/index.js';
16
17
 
@@ -24,5 +25,6 @@ export * from './log.js';
24
25
  export * from './generator.js';
25
26
  export * from './tokenize.js';
26
27
  export * from './audio.js';
28
+ export * from './transcription.js';
27
29
 
28
- export { cli, stt, tts, llm };
30
+ export { cli, stt, tts, llm, multimodal };
@@ -7,7 +7,7 @@ import { fork } from 'child_process';
7
7
  import { EventEmitter, once } from 'events';
8
8
  import type { Logger } from 'pino';
9
9
  import { fileURLToPath } from 'url';
10
- import type { Agent } from '../generator.js';
10
+ import { type Agent, isAgent } from '../generator.js';
11
11
  import type { RunningJobInfo } from '../job.js';
12
12
  import { JobContext } from '../job.js';
13
13
  import { JobProcess } from '../job.js';
@@ -93,7 +93,14 @@ if (process.send) {
93
93
  // [0] `node'
94
94
  // [1] import.meta.filename
95
95
  // [2] import.meta.filename of function containing entry file
96
- const agent: Agent = await import(process.argv[2]).then((agent) => agent.default);
96
+ const moduleFile = process.argv[2];
97
+ const agent: Agent = await import(moduleFile).then((module) => {
98
+ const agent = module.default;
99
+ if (agent === undefined || !isAgent(agent)) {
100
+ throw new Error(`Unable to load agent: Missing or invalid default export in ${moduleFile}`);
101
+ }
102
+ return agent;
103
+ });
97
104
  if (!agent.prewarm) {
98
105
  agent.prewarm = defaultInitializeProcessFunc;
99
106
  }
@@ -102,6 +102,7 @@ export class ProcPool {
102
102
  }
103
103
  this.closed = true;
104
104
  this.controller.abort();
105
+ this.warmedProcQueue.items.forEach((e) => e.close());
105
106
  this.executors.forEach((e) => e.close());
106
107
  await Promise.allSettled(this.tasks);
107
108
  }
package/src/job.ts CHANGED
@@ -9,7 +9,7 @@ import type {
9
9
  Room,
10
10
  RtcConfiguration,
11
11
  } from '@livekit/rtc-node';
12
- import { RoomEvent, TrackKind } from '@livekit/rtc-node';
12
+ import { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';
13
13
  import type { Logger } from 'pino';
14
14
  import { log } from './log.js';
15
15
 
@@ -100,6 +100,26 @@ export class JobContext {
100
100
  this.shutdownCallbacks.push(callback);
101
101
  }
102
102
 
103
+ async waitForParticipant(identity?: string): Promise<RemoteParticipant> {
104
+ if (!this.#room.isConnected) {
105
+ throw new Error('room is not connected');
106
+ }
107
+
108
+ console.log(this.#room.remoteParticipants.values());
109
+
110
+ for (const p of this.#room.remoteParticipants.values()) {
111
+ if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {
112
+ return p;
113
+ }
114
+ }
115
+
116
+ return new Promise((resolve) => {
117
+ this.#room.once(RoomEvent.ParticipantConnected, () => {
118
+ resolve(this.#room.remoteParticipants.values().next().value);
119
+ });
120
+ });
121
+ }
122
+
103
123
  /**
104
124
  * Connects the agent to the room.
105
125
  *
@@ -0,0 +1,254 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { AudioFrame } from '@livekit/rtc-node';
5
+ import { type AudioSource } from '@livekit/rtc-node';
6
+ import { EventEmitter } from 'events';
7
+ import { AudioByteStream } from '../audio.js';
8
+ import type { TranscriptionForwarder } from '../transcription.js';
9
+ import { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
10
+
11
+ export const proto = {};
12
+
13
+ export class PlayoutHandle extends EventEmitter {
14
+ #audioSource: AudioSource;
15
+ #sampleRate: number;
16
+ #itemId: string;
17
+ #contentIndex: number;
18
+ /** @internal */
19
+ transcriptionFwd: TranscriptionForwarder;
20
+ /** @internal */
21
+ doneFut: Future;
22
+ /** @internal */
23
+ intFut: Future;
24
+ /** @internal */
25
+ #interrupted: boolean;
26
+ /** @internal */
27
+ pushedDuration: number;
28
+ /** @internal */
29
+ totalPlayedTime: number | undefined; // Set when playout is done
30
+
31
+ constructor(
32
+ audioSource: AudioSource,
33
+ sampleRate: number,
34
+ itemId: string,
35
+ contentIndex: number,
36
+ transcriptionFwd: TranscriptionForwarder,
37
+ ) {
38
+ super();
39
+ this.#audioSource = audioSource;
40
+ this.#sampleRate = sampleRate;
41
+ this.#itemId = itemId;
42
+ this.#contentIndex = contentIndex;
43
+ this.transcriptionFwd = transcriptionFwd;
44
+ this.doneFut = new Future();
45
+ this.intFut = new Future();
46
+ this.#interrupted = false;
47
+ this.pushedDuration = 0;
48
+ this.totalPlayedTime = undefined;
49
+ }
50
+
51
+ get itemId(): string {
52
+ return this.#itemId;
53
+ }
54
+
55
+ get audioSamples(): number {
56
+ if (this.totalPlayedTime !== undefined) {
57
+ return Math.floor(this.totalPlayedTime * this.#sampleRate);
58
+ }
59
+
60
+ return Math.floor(this.pushedDuration - this.#audioSource.queuedDuration * this.#sampleRate);
61
+ }
62
+
63
+ get textChars(): number {
64
+ return this.transcriptionFwd.currentCharacterIndex;
65
+ }
66
+
67
+ get contentIndex(): number {
68
+ return this.#contentIndex;
69
+ }
70
+
71
+ get interrupted(): boolean {
72
+ return this.#interrupted;
73
+ }
74
+
75
+ get done(): boolean {
76
+ return this.doneFut.done || this.#interrupted;
77
+ }
78
+
79
+ interrupt() {
80
+ if (this.doneFut.done) return;
81
+ this.intFut.resolve();
82
+ this.#interrupted = true;
83
+ }
84
+ }
85
+
86
+ export class AgentPlayout {
87
+ #audioSource: AudioSource;
88
+ #playoutTask: CancellablePromise<void> | null;
89
+ #sampleRate: number;
90
+ #numChannels: number;
91
+ #inFrameSize: number;
92
+ #outFrameSize: number;
93
+ constructor(
94
+ audioSource: AudioSource,
95
+ sampleRate: number,
96
+ numChannels: number,
97
+ inFrameSize: number,
98
+ outFrameSize: number,
99
+ ) {
100
+ this.#audioSource = audioSource;
101
+ this.#playoutTask = null;
102
+ this.#sampleRate = sampleRate;
103
+ this.#numChannels = numChannels;
104
+ this.#inFrameSize = inFrameSize;
105
+ this.#outFrameSize = outFrameSize;
106
+ }
107
+
108
+ play(
109
+ itemId: string,
110
+ contentIndex: number,
111
+ transcriptionFwd: TranscriptionForwarder,
112
+ textStream: AsyncIterableQueue<string>,
113
+ audioStream: AsyncIterableQueue<AudioFrame>,
114
+ ): PlayoutHandle {
115
+ const handle = new PlayoutHandle(
116
+ this.#audioSource,
117
+ this.#sampleRate,
118
+ itemId,
119
+ contentIndex,
120
+ transcriptionFwd,
121
+ );
122
+ this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);
123
+ return handle;
124
+ }
125
+
126
+ #makePlayoutTask(
127
+ oldTask: CancellablePromise<void> | null,
128
+ handle: PlayoutHandle,
129
+ textStream: AsyncIterableQueue<string>,
130
+ audioStream: AsyncIterableQueue<AudioFrame>,
131
+ ): CancellablePromise<void> {
132
+ return new CancellablePromise<void>((resolve, reject, onCancel) => {
133
+ let cancelled = false;
134
+ onCancel(() => {
135
+ cancelled = true;
136
+ });
137
+
138
+ (async () => {
139
+ try {
140
+ if (oldTask) {
141
+ await gracefullyCancel(oldTask);
142
+ }
143
+
144
+ let firstFrame = true;
145
+
146
+ const readText = () =>
147
+ new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {
148
+ let cancelledText = false;
149
+ onCancelText(() => {
150
+ cancelledText = true;
151
+ });
152
+
153
+ (async () => {
154
+ try {
155
+ for await (const text of textStream) {
156
+ if (cancelledText || cancelled) {
157
+ break;
158
+ }
159
+ handle.transcriptionFwd.pushText(text);
160
+ }
161
+ resolveText();
162
+ } catch (error) {
163
+ rejectText(error);
164
+ }
165
+ })();
166
+ });
167
+
168
+ const capture = () =>
169
+ new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {
170
+ let cancelledCapture = false;
171
+ onCancelCapture(() => {
172
+ cancelledCapture = true;
173
+ });
174
+
175
+ (async () => {
176
+ try {
177
+ const samplesPerChannel = this.#outFrameSize;
178
+ const bstream = new AudioByteStream(
179
+ this.#sampleRate,
180
+ this.#numChannels,
181
+ samplesPerChannel,
182
+ );
183
+
184
+ for await (const frame of audioStream) {
185
+ if (cancelledCapture || cancelled) {
186
+ break;
187
+ }
188
+ if (firstFrame) {
189
+ handle.transcriptionFwd.start();
190
+ firstFrame = false;
191
+ }
192
+
193
+ handle.transcriptionFwd.pushAudio(frame);
194
+
195
+ for (const f of bstream.write(frame.data.buffer)) {
196
+ handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
197
+ await this.#audioSource.captureFrame(f);
198
+ }
199
+ }
200
+
201
+ if (!cancelledCapture && !cancelled) {
202
+ for (const f of bstream.flush()) {
203
+ handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
204
+ await this.#audioSource.captureFrame(f);
205
+ }
206
+
207
+ handle.transcriptionFwd.markAudioComplete();
208
+
209
+ await this.#audioSource.waitForPlayout();
210
+ }
211
+
212
+ resolveCapture();
213
+ } catch (error) {
214
+ rejectCapture(error);
215
+ }
216
+ })();
217
+ });
218
+
219
+ const readTextTask = readText();
220
+ const captureTask = capture();
221
+
222
+ try {
223
+ await Promise.race([captureTask, handle.intFut.await]);
224
+ } finally {
225
+ if (!captureTask.isCancelled) {
226
+ await gracefullyCancel(captureTask);
227
+ }
228
+
229
+ handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
230
+
231
+ if (handle.interrupted || captureTask.error) {
232
+ this.#audioSource.clearQueue(); // make sure to remove any queued frames
233
+ }
234
+
235
+ if (!readTextTask.isCancelled) {
236
+ await gracefullyCancel(readTextTask);
237
+ }
238
+
239
+ if (!firstFrame && !handle.interrupted) {
240
+ handle.transcriptionFwd.markTextComplete();
241
+ }
242
+
243
+ handle.doneFut.resolve();
244
+ await handle.transcriptionFwd.close(handle.interrupted);
245
+ }
246
+
247
+ resolve();
248
+ } catch (error) {
249
+ reject(error);
250
+ }
251
+ })();
252
+ });
253
+ }
254
+ }
@@ -0,0 +1,5 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export * from './multimodal_agent.js';
5
+ export * from './agent_playout.js';