@livekit/agents 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +26 -0
- package/dist/audio.d.ts +1 -4
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +28 -11
- package/dist/audio.js.map +1 -1
- package/dist/cli.d.ts +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +36 -13
- package/dist/cli.js.map +1 -1
- package/dist/generator.d.ts +5 -0
- package/dist/generator.d.ts.map +1 -1
- package/dist/generator.js +11 -0
- package/dist/generator.js.map +1 -1
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js +5 -0
- package/dist/http_server.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_main.js +9 -1
- package/dist/ipc/job_main.js.map +1 -1
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +1 -0
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/job.d.ts +1 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +17 -1
- package/dist/job.js.map +1 -1
- package/dist/multimodal/agent_playout.d.ts +34 -0
- package/dist/multimodal/agent_playout.d.ts.map +1 -0
- package/dist/multimodal/agent_playout.js +221 -0
- package/dist/multimodal/agent_playout.js.map +1 -0
- package/dist/multimodal/index.d.ts +3 -0
- package/dist/multimodal/index.d.ts.map +1 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/multimodal/index.js.map +1 -0
- package/dist/multimodal/multimodal_agent.d.ts +47 -0
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -0
- package/dist/multimodal/multimodal_agent.js +331 -0
- package/dist/multimodal/multimodal_agent.js.map +1 -0
- package/dist/transcription.d.ts +22 -0
- package/dist/transcription.d.ts.map +1 -0
- package/dist/transcription.js +111 -0
- package/dist/transcription.js.map +1 -0
- package/dist/utils.d.ts +27 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +107 -9
- package/dist/utils.js.map +1 -1
- package/dist/worker.d.ts +3 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +44 -8
- package/dist/worker.js.map +1 -1
- package/package.json +6 -4
- package/src/audio.ts +19 -19
- package/src/cli.ts +37 -13
- package/src/generator.ts +14 -0
- package/src/http_server.ts +5 -0
- package/src/index.ts +3 -1
- package/src/ipc/job_main.ts +9 -2
- package/src/ipc/proc_pool.ts +1 -0
- package/src/job.ts +21 -1
- package/src/multimodal/agent_playout.ts +254 -0
- package/src/multimodal/index.ts +5 -0
- package/src/multimodal/multimodal_agent.ts +428 -0
- package/src/transcription.ts +128 -0
- package/src/utils.ts +138 -6
- package/src/worker.ts +54 -10
- package/tsconfig.json +1 -1
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "LiveKit Node
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "LiveKit Agents - Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
-
"author": "
|
|
7
|
+
"author": "LiveKit",
|
|
8
8
|
"type": "module",
|
|
9
9
|
"devDependencies": {
|
|
10
10
|
"@microsoft/api-extractor": "^7.35.0",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
"dependencies": {
|
|
16
16
|
"@livekit/protocol": "^1.21.0",
|
|
17
|
-
"@livekit/rtc-node": "^0.
|
|
17
|
+
"@livekit/rtc-node": "^0.9.0",
|
|
18
18
|
"commander": "^12.0.0",
|
|
19
19
|
"livekit-server-sdk": "^2.6.1",
|
|
20
20
|
"pino": "^8.19.0",
|
|
@@ -24,6 +24,8 @@
|
|
|
24
24
|
},
|
|
25
25
|
"scripts": {
|
|
26
26
|
"build": "tsc",
|
|
27
|
+
"clean": "rm -rf dist",
|
|
28
|
+
"clean:build": "pnpm clean && pnpm build",
|
|
27
29
|
"lint": "eslint -f unix \"src/**/*.ts\"",
|
|
28
30
|
"api:check": "api-extractor run --typescript-compiler-folder ../node_modules/typescript",
|
|
29
31
|
"api:update": "api-extractor run --local --typescript-compiler-folder ../node_modules/typescript --verbose"
|
package/src/audio.ts
CHANGED
|
@@ -6,36 +6,36 @@ import { log } from './log.js';
|
|
|
6
6
|
|
|
7
7
|
/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */
|
|
8
8
|
export class AudioByteStream {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
#sampleRate: number;
|
|
10
|
+
#numChannels: number;
|
|
11
|
+
#bytesPerFrame: number;
|
|
12
|
+
#buf: Int8Array;
|
|
13
13
|
|
|
14
14
|
constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {
|
|
15
|
-
this
|
|
16
|
-
this
|
|
15
|
+
this.#sampleRate = sampleRate;
|
|
16
|
+
this.#numChannels = numChannels;
|
|
17
17
|
|
|
18
18
|
if (samplesPerChannel === null) {
|
|
19
19
|
samplesPerChannel = Math.floor(sampleRate / 50); // 20ms by default
|
|
20
20
|
}
|
|
21
21
|
|
|
22
|
-
this
|
|
23
|
-
this
|
|
22
|
+
this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)
|
|
23
|
+
this.#buf = new Int8Array();
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
write(data: ArrayBuffer): AudioFrame[] {
|
|
27
|
-
this
|
|
27
|
+
this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);
|
|
28
28
|
|
|
29
29
|
const frames: AudioFrame[] = [];
|
|
30
|
-
while (this
|
|
31
|
-
const frameData = this
|
|
32
|
-
this
|
|
30
|
+
while (this.#buf.length >= this.#bytesPerFrame) {
|
|
31
|
+
const frameData = this.#buf.slice(0, this.#bytesPerFrame);
|
|
32
|
+
this.#buf = this.#buf.slice(this.#bytesPerFrame);
|
|
33
33
|
|
|
34
34
|
frames.push(
|
|
35
35
|
new AudioFrame(
|
|
36
36
|
new Int16Array(frameData.buffer),
|
|
37
|
-
this
|
|
38
|
-
this
|
|
37
|
+
this.#sampleRate,
|
|
38
|
+
this.#numChannels,
|
|
39
39
|
frameData.length / 2,
|
|
40
40
|
),
|
|
41
41
|
);
|
|
@@ -45,17 +45,17 @@ export class AudioByteStream {
|
|
|
45
45
|
}
|
|
46
46
|
|
|
47
47
|
flush(): AudioFrame[] {
|
|
48
|
-
if (this
|
|
48
|
+
if (this.#buf.length % (2 * this.#numChannels) !== 0) {
|
|
49
49
|
log().warn('AudioByteStream: incomplete frame during flush, dropping');
|
|
50
50
|
return [];
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
return [
|
|
54
54
|
new AudioFrame(
|
|
55
|
-
new Int16Array(this
|
|
56
|
-
this
|
|
57
|
-
this
|
|
58
|
-
this
|
|
55
|
+
new Int16Array(this.#buf.buffer),
|
|
56
|
+
this.#sampleRate,
|
|
57
|
+
this.#numChannels,
|
|
58
|
+
this.#buf.length / 2,
|
|
59
59
|
),
|
|
60
60
|
];
|
|
61
61
|
}
|
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { Command, Option } from 'commander';
|
|
|
5
5
|
import type { EventEmitter } from 'events';
|
|
6
6
|
import { initializeLogger, log } from './log.js';
|
|
7
7
|
import { version } from './version.js';
|
|
8
|
-
import { Worker,
|
|
8
|
+
import { Worker, WorkerOptions } from './worker.js';
|
|
9
9
|
|
|
10
10
|
type CliArgs = {
|
|
11
11
|
opts: WorkerOptions;
|
|
@@ -18,7 +18,10 @@ type CliArgs = {
|
|
|
18
18
|
|
|
19
19
|
const runWorker = async (args: CliArgs) => {
|
|
20
20
|
initializeLogger({ pretty: !args.production, level: args.opts.logLevel });
|
|
21
|
-
|
|
21
|
+
|
|
22
|
+
// though `production` is defined in WorkerOptions, it will always be overriddden by CLI.
|
|
23
|
+
const { production: _, ...opts } = args.opts; // eslint-disable-line @typescript-eslint/no-unused-vars
|
|
24
|
+
const worker = new Worker(new WorkerOptions({ production: args.production, ...opts }));
|
|
22
25
|
|
|
23
26
|
if (args.room) {
|
|
24
27
|
worker.event.once('worker_registered', () => {
|
|
@@ -72,20 +75,29 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
72
75
|
.env('LOG_LEVEL'),
|
|
73
76
|
)
|
|
74
77
|
.addOption(
|
|
75
|
-
new Option('--url <string>', 'LiveKit server or Cloud project websocket URL')
|
|
76
|
-
|
|
77
|
-
|
|
78
|
+
new Option('--url <string>', 'LiveKit server or Cloud project websocket URL').env(
|
|
79
|
+
'LIVEKIT_URL',
|
|
80
|
+
),
|
|
78
81
|
)
|
|
79
82
|
.addOption(
|
|
80
|
-
new Option('--api-key <string>', "LiveKit server or Cloud project's API key")
|
|
81
|
-
|
|
82
|
-
|
|
83
|
+
new Option('--api-key <string>', "LiveKit server or Cloud project's API key").env(
|
|
84
|
+
'LIVEKIT_API_KEY',
|
|
85
|
+
),
|
|
83
86
|
)
|
|
84
87
|
.addOption(
|
|
85
|
-
new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
)
|
|
88
|
+
new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret").env(
|
|
89
|
+
'LIVEKIT_API_SECRET',
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
.action(() => {
|
|
93
|
+
if (
|
|
94
|
+
// do not run CLI if origin file is agents/ipc/job_main.js
|
|
95
|
+
process.argv[1] !== new URL('ipc/job_main.js', import.meta.url).pathname ||
|
|
96
|
+
process.argv.length < 3
|
|
97
|
+
) {
|
|
98
|
+
program.help();
|
|
99
|
+
}
|
|
100
|
+
});
|
|
89
101
|
|
|
90
102
|
program
|
|
91
103
|
.command('start')
|
|
@@ -106,6 +118,12 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
106
118
|
program
|
|
107
119
|
.command('dev')
|
|
108
120
|
.description('Start the worker in development mode')
|
|
121
|
+
.addOption(
|
|
122
|
+
new Option('--log-level <level>', 'Set the logging level')
|
|
123
|
+
.choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
|
|
124
|
+
.default('debug')
|
|
125
|
+
.env('LOG_LEVEL'),
|
|
126
|
+
)
|
|
109
127
|
.action(() => {
|
|
110
128
|
const options = program.optsWithGlobals();
|
|
111
129
|
opts.wsURL = options.url || opts.wsURL;
|
|
@@ -123,7 +141,13 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
123
141
|
.command('connect')
|
|
124
142
|
.description('Connect to a specific room')
|
|
125
143
|
.requiredOption('--room <string>', 'Room name to connect to')
|
|
126
|
-
.option('--participant-identity <string>', '
|
|
144
|
+
.option('--participant-identity <string>', 'Identity of user to listen to')
|
|
145
|
+
.addOption(
|
|
146
|
+
new Option('--log-level <level>', 'Set the logging level')
|
|
147
|
+
.choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
|
|
148
|
+
.default('debug')
|
|
149
|
+
.env('LOG_LEVEL'),
|
|
150
|
+
)
|
|
127
151
|
.action((...[, command]) => {
|
|
128
152
|
const options = command.optsWithGlobals();
|
|
129
153
|
opts.wsURL = options.url || opts.wsURL;
|
package/src/generator.ts
CHANGED
|
@@ -9,6 +9,20 @@ export interface Agent {
|
|
|
9
9
|
prewarm?: (proc: JobProcess) => unknown;
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
+
/** Helper to check if an object is an agent before running it.
|
|
13
|
+
*
|
|
14
|
+
* @internal
|
|
15
|
+
*/
|
|
16
|
+
export function isAgent(obj: unknown): obj is Agent {
|
|
17
|
+
return (
|
|
18
|
+
typeof obj === 'object' &&
|
|
19
|
+
obj !== null &&
|
|
20
|
+
'entry' in obj &&
|
|
21
|
+
typeof (obj as Agent).entry === 'function' &&
|
|
22
|
+
(('prewarm' in obj && typeof (obj as Agent).prewarm === 'function') || !('prewarm' in obj))
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
12
26
|
/**
|
|
13
27
|
* Helper to define an agent according to the required interface.
|
|
14
28
|
* @example A basic agent with entry and prewarm functions
|
package/src/http_server.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { type IncomingMessage, type Server, type ServerResponse, createServer } from 'http';
|
|
5
|
+
import { log } from './log.js';
|
|
5
6
|
|
|
6
7
|
const healthCheck = async (res: ServerResponse) => {
|
|
7
8
|
res.writeHead(200);
|
|
@@ -31,6 +32,10 @@ export class HTTPServer {
|
|
|
31
32
|
return new Promise((resolve, reject) => {
|
|
32
33
|
this.app.listen(this.port, this.host, (err?: Error) => {
|
|
33
34
|
if (err) reject(err);
|
|
35
|
+
const address = this.app.address();
|
|
36
|
+
if (typeof address! !== 'string') {
|
|
37
|
+
log().info(`Server is listening on port ${address!.port}`);
|
|
38
|
+
}
|
|
34
39
|
resolve();
|
|
35
40
|
});
|
|
36
41
|
});
|
package/src/index.ts
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import * as cli from './cli.js';
|
|
13
13
|
import * as llm from './llm/index.js';
|
|
14
|
+
import * as multimodal from './multimodal/index.js';
|
|
14
15
|
import * as stt from './stt/index.js';
|
|
15
16
|
import * as tts from './tts/index.js';
|
|
16
17
|
|
|
@@ -24,5 +25,6 @@ export * from './log.js';
|
|
|
24
25
|
export * from './generator.js';
|
|
25
26
|
export * from './tokenize.js';
|
|
26
27
|
export * from './audio.js';
|
|
28
|
+
export * from './transcription.js';
|
|
27
29
|
|
|
28
|
-
export { cli, stt, tts, llm };
|
|
30
|
+
export { cli, stt, tts, llm, multimodal };
|
package/src/ipc/job_main.ts
CHANGED
|
@@ -7,7 +7,7 @@ import { fork } from 'child_process';
|
|
|
7
7
|
import { EventEmitter, once } from 'events';
|
|
8
8
|
import type { Logger } from 'pino';
|
|
9
9
|
import { fileURLToPath } from 'url';
|
|
10
|
-
import type
|
|
10
|
+
import { type Agent, isAgent } from '../generator.js';
|
|
11
11
|
import type { RunningJobInfo } from '../job.js';
|
|
12
12
|
import { JobContext } from '../job.js';
|
|
13
13
|
import { JobProcess } from '../job.js';
|
|
@@ -93,7 +93,14 @@ if (process.send) {
|
|
|
93
93
|
// [0] `node'
|
|
94
94
|
// [1] import.meta.filename
|
|
95
95
|
// [2] import.meta.filename of function containing entry file
|
|
96
|
-
const
|
|
96
|
+
const moduleFile = process.argv[2];
|
|
97
|
+
const agent: Agent = await import(moduleFile).then((module) => {
|
|
98
|
+
const agent = module.default;
|
|
99
|
+
if (agent === undefined || !isAgent(agent)) {
|
|
100
|
+
throw new Error(`Unable to load agent: Missing or invalid default export in ${moduleFile}`);
|
|
101
|
+
}
|
|
102
|
+
return agent;
|
|
103
|
+
});
|
|
97
104
|
if (!agent.prewarm) {
|
|
98
105
|
agent.prewarm = defaultInitializeProcessFunc;
|
|
99
106
|
}
|
package/src/ipc/proc_pool.ts
CHANGED
package/src/job.ts
CHANGED
|
@@ -9,7 +9,7 @@ import type {
|
|
|
9
9
|
Room,
|
|
10
10
|
RtcConfiguration,
|
|
11
11
|
} from '@livekit/rtc-node';
|
|
12
|
-
import { RoomEvent, TrackKind } from '@livekit/rtc-node';
|
|
12
|
+
import { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';
|
|
13
13
|
import type { Logger } from 'pino';
|
|
14
14
|
import { log } from './log.js';
|
|
15
15
|
|
|
@@ -100,6 +100,26 @@ export class JobContext {
|
|
|
100
100
|
this.shutdownCallbacks.push(callback);
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
+
async waitForParticipant(identity?: string): Promise<RemoteParticipant> {
|
|
104
|
+
if (!this.#room.isConnected) {
|
|
105
|
+
throw new Error('room is not connected');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
console.log(this.#room.remoteParticipants.values());
|
|
109
|
+
|
|
110
|
+
for (const p of this.#room.remoteParticipants.values()) {
|
|
111
|
+
if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {
|
|
112
|
+
return p;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return new Promise((resolve) => {
|
|
117
|
+
this.#room.once(RoomEvent.ParticipantConnected, () => {
|
|
118
|
+
resolve(this.#room.remoteParticipants.values().next().value);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
103
123
|
/**
|
|
104
124
|
* Connects the agent to the room.
|
|
105
125
|
*
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
+
import { type AudioSource } from '@livekit/rtc-node';
|
|
6
|
+
import { EventEmitter } from 'events';
|
|
7
|
+
import { AudioByteStream } from '../audio.js';
|
|
8
|
+
import type { TranscriptionForwarder } from '../transcription.js';
|
|
9
|
+
import { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
|
|
10
|
+
|
|
11
|
+
export const proto = {};
|
|
12
|
+
|
|
13
|
+
export class PlayoutHandle extends EventEmitter {
|
|
14
|
+
#audioSource: AudioSource;
|
|
15
|
+
#sampleRate: number;
|
|
16
|
+
#itemId: string;
|
|
17
|
+
#contentIndex: number;
|
|
18
|
+
/** @internal */
|
|
19
|
+
transcriptionFwd: TranscriptionForwarder;
|
|
20
|
+
/** @internal */
|
|
21
|
+
doneFut: Future;
|
|
22
|
+
/** @internal */
|
|
23
|
+
intFut: Future;
|
|
24
|
+
/** @internal */
|
|
25
|
+
#interrupted: boolean;
|
|
26
|
+
/** @internal */
|
|
27
|
+
pushedDuration: number;
|
|
28
|
+
/** @internal */
|
|
29
|
+
totalPlayedTime: number | undefined; // Set when playout is done
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
audioSource: AudioSource,
|
|
33
|
+
sampleRate: number,
|
|
34
|
+
itemId: string,
|
|
35
|
+
contentIndex: number,
|
|
36
|
+
transcriptionFwd: TranscriptionForwarder,
|
|
37
|
+
) {
|
|
38
|
+
super();
|
|
39
|
+
this.#audioSource = audioSource;
|
|
40
|
+
this.#sampleRate = sampleRate;
|
|
41
|
+
this.#itemId = itemId;
|
|
42
|
+
this.#contentIndex = contentIndex;
|
|
43
|
+
this.transcriptionFwd = transcriptionFwd;
|
|
44
|
+
this.doneFut = new Future();
|
|
45
|
+
this.intFut = new Future();
|
|
46
|
+
this.#interrupted = false;
|
|
47
|
+
this.pushedDuration = 0;
|
|
48
|
+
this.totalPlayedTime = undefined;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
get itemId(): string {
|
|
52
|
+
return this.#itemId;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
get audioSamples(): number {
|
|
56
|
+
if (this.totalPlayedTime !== undefined) {
|
|
57
|
+
return Math.floor(this.totalPlayedTime * this.#sampleRate);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return Math.floor(this.pushedDuration - this.#audioSource.queuedDuration * this.#sampleRate);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
get textChars(): number {
|
|
64
|
+
return this.transcriptionFwd.currentCharacterIndex;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
get contentIndex(): number {
|
|
68
|
+
return this.#contentIndex;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get interrupted(): boolean {
|
|
72
|
+
return this.#interrupted;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
get done(): boolean {
|
|
76
|
+
return this.doneFut.done || this.#interrupted;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interrupt() {
|
|
80
|
+
if (this.doneFut.done) return;
|
|
81
|
+
this.intFut.resolve();
|
|
82
|
+
this.#interrupted = true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export class AgentPlayout {
|
|
87
|
+
#audioSource: AudioSource;
|
|
88
|
+
#playoutTask: CancellablePromise<void> | null;
|
|
89
|
+
#sampleRate: number;
|
|
90
|
+
#numChannels: number;
|
|
91
|
+
#inFrameSize: number;
|
|
92
|
+
#outFrameSize: number;
|
|
93
|
+
constructor(
|
|
94
|
+
audioSource: AudioSource,
|
|
95
|
+
sampleRate: number,
|
|
96
|
+
numChannels: number,
|
|
97
|
+
inFrameSize: number,
|
|
98
|
+
outFrameSize: number,
|
|
99
|
+
) {
|
|
100
|
+
this.#audioSource = audioSource;
|
|
101
|
+
this.#playoutTask = null;
|
|
102
|
+
this.#sampleRate = sampleRate;
|
|
103
|
+
this.#numChannels = numChannels;
|
|
104
|
+
this.#inFrameSize = inFrameSize;
|
|
105
|
+
this.#outFrameSize = outFrameSize;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
play(
|
|
109
|
+
itemId: string,
|
|
110
|
+
contentIndex: number,
|
|
111
|
+
transcriptionFwd: TranscriptionForwarder,
|
|
112
|
+
textStream: AsyncIterableQueue<string>,
|
|
113
|
+
audioStream: AsyncIterableQueue<AudioFrame>,
|
|
114
|
+
): PlayoutHandle {
|
|
115
|
+
const handle = new PlayoutHandle(
|
|
116
|
+
this.#audioSource,
|
|
117
|
+
this.#sampleRate,
|
|
118
|
+
itemId,
|
|
119
|
+
contentIndex,
|
|
120
|
+
transcriptionFwd,
|
|
121
|
+
);
|
|
122
|
+
this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);
|
|
123
|
+
return handle;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#makePlayoutTask(
|
|
127
|
+
oldTask: CancellablePromise<void> | null,
|
|
128
|
+
handle: PlayoutHandle,
|
|
129
|
+
textStream: AsyncIterableQueue<string>,
|
|
130
|
+
audioStream: AsyncIterableQueue<AudioFrame>,
|
|
131
|
+
): CancellablePromise<void> {
|
|
132
|
+
return new CancellablePromise<void>((resolve, reject, onCancel) => {
|
|
133
|
+
let cancelled = false;
|
|
134
|
+
onCancel(() => {
|
|
135
|
+
cancelled = true;
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
(async () => {
|
|
139
|
+
try {
|
|
140
|
+
if (oldTask) {
|
|
141
|
+
await gracefullyCancel(oldTask);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let firstFrame = true;
|
|
145
|
+
|
|
146
|
+
const readText = () =>
|
|
147
|
+
new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {
|
|
148
|
+
let cancelledText = false;
|
|
149
|
+
onCancelText(() => {
|
|
150
|
+
cancelledText = true;
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
(async () => {
|
|
154
|
+
try {
|
|
155
|
+
for await (const text of textStream) {
|
|
156
|
+
if (cancelledText || cancelled) {
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
handle.transcriptionFwd.pushText(text);
|
|
160
|
+
}
|
|
161
|
+
resolveText();
|
|
162
|
+
} catch (error) {
|
|
163
|
+
rejectText(error);
|
|
164
|
+
}
|
|
165
|
+
})();
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const capture = () =>
|
|
169
|
+
new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {
|
|
170
|
+
let cancelledCapture = false;
|
|
171
|
+
onCancelCapture(() => {
|
|
172
|
+
cancelledCapture = true;
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
(async () => {
|
|
176
|
+
try {
|
|
177
|
+
const samplesPerChannel = this.#outFrameSize;
|
|
178
|
+
const bstream = new AudioByteStream(
|
|
179
|
+
this.#sampleRate,
|
|
180
|
+
this.#numChannels,
|
|
181
|
+
samplesPerChannel,
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
for await (const frame of audioStream) {
|
|
185
|
+
if (cancelledCapture || cancelled) {
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
if (firstFrame) {
|
|
189
|
+
handle.transcriptionFwd.start();
|
|
190
|
+
firstFrame = false;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
handle.transcriptionFwd.pushAudio(frame);
|
|
194
|
+
|
|
195
|
+
for (const f of bstream.write(frame.data.buffer)) {
|
|
196
|
+
handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
|
|
197
|
+
await this.#audioSource.captureFrame(f);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (!cancelledCapture && !cancelled) {
|
|
202
|
+
for (const f of bstream.flush()) {
|
|
203
|
+
handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
|
|
204
|
+
await this.#audioSource.captureFrame(f);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
handle.transcriptionFwd.markAudioComplete();
|
|
208
|
+
|
|
209
|
+
await this.#audioSource.waitForPlayout();
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
resolveCapture();
|
|
213
|
+
} catch (error) {
|
|
214
|
+
rejectCapture(error);
|
|
215
|
+
}
|
|
216
|
+
})();
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
const readTextTask = readText();
|
|
220
|
+
const captureTask = capture();
|
|
221
|
+
|
|
222
|
+
try {
|
|
223
|
+
await Promise.race([captureTask, handle.intFut.await]);
|
|
224
|
+
} finally {
|
|
225
|
+
if (!captureTask.isCancelled) {
|
|
226
|
+
await gracefullyCancel(captureTask);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
230
|
+
|
|
231
|
+
if (handle.interrupted || captureTask.error) {
|
|
232
|
+
this.#audioSource.clearQueue(); // make sure to remove any queued frames
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (!readTextTask.isCancelled) {
|
|
236
|
+
await gracefullyCancel(readTextTask);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (!firstFrame && !handle.interrupted) {
|
|
240
|
+
handle.transcriptionFwd.markTextComplete();
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
handle.doneFut.resolve();
|
|
244
|
+
await handle.transcriptionFwd.close(handle.interrupted);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
resolve();
|
|
248
|
+
} catch (error) {
|
|
249
|
+
reject(error);
|
|
250
|
+
}
|
|
251
|
+
})();
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|