@livekit/agents 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +40 -0
- package/dist/audio.d.ts +1 -4
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +30 -12
- package/dist/audio.js.map +1 -1
- package/dist/cli.d.ts +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +41 -17
- package/dist/cli.js.map +1 -1
- package/dist/generator.d.ts +5 -0
- package/dist/generator.d.ts.map +1 -1
- package/dist/generator.js +11 -0
- package/dist/generator.js.map +1 -1
- package/dist/http_server.d.ts +1 -0
- package/dist/http_server.d.ts.map +1 -1
- package/dist/http_server.js +13 -0
- package/dist/http_server.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_main.js +9 -1
- package/dist/ipc/job_main.js.map +1 -1
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +1 -0
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/job.d.ts +1 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +30 -1
- package/dist/job.js.map +1 -1
- package/dist/multimodal/agent_playout.d.ts +34 -0
- package/dist/multimodal/agent_playout.d.ts.map +1 -0
- package/dist/multimodal/agent_playout.js +221 -0
- package/dist/multimodal/agent_playout.js.map +1 -0
- package/dist/multimodal/index.d.ts +3 -0
- package/dist/multimodal/index.d.ts.map +1 -0
- package/dist/multimodal/index.js +6 -0
- package/dist/multimodal/index.js.map +1 -0
- package/dist/multimodal/multimodal_agent.d.ts +47 -0
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -0
- package/dist/multimodal/multimodal_agent.js +329 -0
- package/dist/multimodal/multimodal_agent.js.map +1 -0
- package/dist/transcription.d.ts +22 -0
- package/dist/transcription.d.ts.map +1 -0
- package/dist/transcription.js +112 -0
- package/dist/transcription.js.map +1 -0
- package/dist/utils.d.ts +29 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +117 -15
- package/dist/utils.js.map +1 -1
- package/dist/worker.d.ts +3 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +49 -9
- package/dist/worker.js.map +1 -1
- package/package.json +6 -4
- package/src/audio.ts +21 -20
- package/src/cli.ts +42 -17
- package/src/generator.ts +14 -0
- package/src/http_server.ts +6 -0
- package/src/index.ts +3 -1
- package/src/ipc/job_main.ts +9 -2
- package/src/ipc/proc_pool.ts +1 -0
- package/src/job.ts +37 -1
- package/src/multimodal/agent_playout.ts +254 -0
- package/src/multimodal/index.ts +5 -0
- package/src/multimodal/multimodal_agent.ts +426 -0
- package/src/transcription.ts +129 -0
- package/src/utils.ts +151 -12
- package/src/worker.ts +60 -14
- package/tsconfig.json +1 -1
package/src/cli.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { Command, Option } from 'commander';
|
|
|
5
5
|
import type { EventEmitter } from 'events';
|
|
6
6
|
import { initializeLogger, log } from './log.js';
|
|
7
7
|
import { version } from './version.js';
|
|
8
|
-
import { Worker,
|
|
8
|
+
import { Worker, WorkerOptions } from './worker.js';
|
|
9
9
|
|
|
10
10
|
type CliArgs = {
|
|
11
11
|
opts: WorkerOptions;
|
|
@@ -18,11 +18,15 @@ type CliArgs = {
|
|
|
18
18
|
|
|
19
19
|
const runWorker = async (args: CliArgs) => {
|
|
20
20
|
initializeLogger({ pretty: !args.production, level: args.opts.logLevel });
|
|
21
|
-
const
|
|
21
|
+
const logger = log();
|
|
22
|
+
|
|
23
|
+
// though `production` is defined in WorkerOptions, it will always be overriddden by CLI.
|
|
24
|
+
const { production: _, ...opts } = args.opts; // eslint-disable-line @typescript-eslint/no-unused-vars
|
|
25
|
+
const worker = new Worker(new WorkerOptions({ production: args.production, ...opts }));
|
|
22
26
|
|
|
23
27
|
if (args.room) {
|
|
24
28
|
worker.event.once('worker_registered', () => {
|
|
25
|
-
|
|
29
|
+
logger.info(`connecting to room ${args.room}`);
|
|
26
30
|
worker.simulateJob(args.room!, args.participantIdentity);
|
|
27
31
|
});
|
|
28
32
|
}
|
|
@@ -30,21 +34,21 @@ const runWorker = async (args: CliArgs) => {
|
|
|
30
34
|
process.once('SIGINT', async () => {
|
|
31
35
|
// allow C-c C-c for force interrupt
|
|
32
36
|
process.once('SIGINT', () => {
|
|
33
|
-
|
|
37
|
+
logger.info('worker closed forcefully');
|
|
34
38
|
process.exit(130); // SIGINT exit code
|
|
35
39
|
});
|
|
36
40
|
if (args.production) {
|
|
37
41
|
await worker.drain();
|
|
38
42
|
}
|
|
39
43
|
await worker.close();
|
|
40
|
-
|
|
44
|
+
logger.info('worker closed');
|
|
41
45
|
process.exit(130); // SIGINT exit code
|
|
42
46
|
});
|
|
43
47
|
|
|
44
48
|
try {
|
|
45
49
|
await worker.run();
|
|
46
50
|
} catch {
|
|
47
|
-
|
|
51
|
+
logger.fatal('worker failed');
|
|
48
52
|
process.exit(1);
|
|
49
53
|
}
|
|
50
54
|
};
|
|
@@ -72,20 +76,29 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
72
76
|
.env('LOG_LEVEL'),
|
|
73
77
|
)
|
|
74
78
|
.addOption(
|
|
75
|
-
new Option('--url <string>', 'LiveKit server or Cloud project websocket URL')
|
|
76
|
-
|
|
77
|
-
|
|
79
|
+
new Option('--url <string>', 'LiveKit server or Cloud project websocket URL').env(
|
|
80
|
+
'LIVEKIT_URL',
|
|
81
|
+
),
|
|
78
82
|
)
|
|
79
83
|
.addOption(
|
|
80
|
-
new Option('--api-key <string>', "LiveKit server or Cloud project's API key")
|
|
81
|
-
|
|
82
|
-
|
|
84
|
+
new Option('--api-key <string>', "LiveKit server or Cloud project's API key").env(
|
|
85
|
+
'LIVEKIT_API_KEY',
|
|
86
|
+
),
|
|
83
87
|
)
|
|
84
88
|
.addOption(
|
|
85
|
-
new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret")
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
)
|
|
89
|
+
new Option('--api-secret <string>', "LiveKit server or Cloud project's API secret").env(
|
|
90
|
+
'LIVEKIT_API_SECRET',
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
.action(() => {
|
|
94
|
+
if (
|
|
95
|
+
// do not run CLI if origin file is agents/ipc/job_main.js
|
|
96
|
+
process.argv[1] !== new URL('ipc/job_main.js', import.meta.url).pathname ||
|
|
97
|
+
process.argv.length < 3
|
|
98
|
+
) {
|
|
99
|
+
program.help();
|
|
100
|
+
}
|
|
101
|
+
});
|
|
89
102
|
|
|
90
103
|
program
|
|
91
104
|
.command('start')
|
|
@@ -106,6 +119,12 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
106
119
|
program
|
|
107
120
|
.command('dev')
|
|
108
121
|
.description('Start the worker in development mode')
|
|
122
|
+
.addOption(
|
|
123
|
+
new Option('--log-level <level>', 'Set the logging level')
|
|
124
|
+
.choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
|
|
125
|
+
.default('debug')
|
|
126
|
+
.env('LOG_LEVEL'),
|
|
127
|
+
)
|
|
109
128
|
.action(() => {
|
|
110
129
|
const options = program.optsWithGlobals();
|
|
111
130
|
opts.wsURL = options.url || opts.wsURL;
|
|
@@ -123,7 +142,13 @@ export const runApp = (opts: WorkerOptions) => {
|
|
|
123
142
|
.command('connect')
|
|
124
143
|
.description('Connect to a specific room')
|
|
125
144
|
.requiredOption('--room <string>', 'Room name to connect to')
|
|
126
|
-
.option('--participant-identity <string>', '
|
|
145
|
+
.option('--participant-identity <string>', 'Identity of user to listen to')
|
|
146
|
+
.addOption(
|
|
147
|
+
new Option('--log-level <level>', 'Set the logging level')
|
|
148
|
+
.choices(['trace', 'debug', 'info', 'warn', 'error', 'fatal'])
|
|
149
|
+
.default('debug')
|
|
150
|
+
.env('LOG_LEVEL'),
|
|
151
|
+
)
|
|
127
152
|
.action((...[, command]) => {
|
|
128
153
|
const options = command.optsWithGlobals();
|
|
129
154
|
opts.wsURL = options.url || opts.wsURL;
|
package/src/generator.ts
CHANGED
|
@@ -9,6 +9,20 @@ export interface Agent {
|
|
|
9
9
|
prewarm?: (proc: JobProcess) => unknown;
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
+
/** Helper to check if an object is an agent before running it.
|
|
13
|
+
*
|
|
14
|
+
* @internal
|
|
15
|
+
*/
|
|
16
|
+
export function isAgent(obj: unknown): obj is Agent {
|
|
17
|
+
return (
|
|
18
|
+
typeof obj === 'object' &&
|
|
19
|
+
obj !== null &&
|
|
20
|
+
'entry' in obj &&
|
|
21
|
+
typeof (obj as Agent).entry === 'function' &&
|
|
22
|
+
(('prewarm' in obj && typeof (obj as Agent).prewarm === 'function') || !('prewarm' in obj))
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
12
26
|
/**
|
|
13
27
|
* Helper to define an agent according to the required interface.
|
|
14
28
|
* @example A basic agent with entry and prewarm functions
|
package/src/http_server.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { type IncomingMessage, type Server, type ServerResponse, createServer } from 'http';
|
|
5
|
+
import { log } from './log.js';
|
|
5
6
|
|
|
6
7
|
const healthCheck = async (res: ServerResponse) => {
|
|
7
8
|
res.writeHead(200);
|
|
@@ -12,6 +13,7 @@ export class HTTPServer {
|
|
|
12
13
|
host: string;
|
|
13
14
|
port: number;
|
|
14
15
|
app: Server;
|
|
16
|
+
#logger = log();
|
|
15
17
|
|
|
16
18
|
constructor(host: string, port: number) {
|
|
17
19
|
this.host = host;
|
|
@@ -31,6 +33,10 @@ export class HTTPServer {
|
|
|
31
33
|
return new Promise((resolve, reject) => {
|
|
32
34
|
this.app.listen(this.port, this.host, (err?: Error) => {
|
|
33
35
|
if (err) reject(err);
|
|
36
|
+
const address = this.app.address();
|
|
37
|
+
if (typeof address! !== 'string') {
|
|
38
|
+
this.#logger.info(`Server is listening on port ${address!.port}`);
|
|
39
|
+
}
|
|
34
40
|
resolve();
|
|
35
41
|
});
|
|
36
42
|
});
|
package/src/index.ts
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import * as cli from './cli.js';
|
|
13
13
|
import * as llm from './llm/index.js';
|
|
14
|
+
import * as multimodal from './multimodal/index.js';
|
|
14
15
|
import * as stt from './stt/index.js';
|
|
15
16
|
import * as tts from './tts/index.js';
|
|
16
17
|
|
|
@@ -24,5 +25,6 @@ export * from './log.js';
|
|
|
24
25
|
export * from './generator.js';
|
|
25
26
|
export * from './tokenize.js';
|
|
26
27
|
export * from './audio.js';
|
|
28
|
+
export * from './transcription.js';
|
|
27
29
|
|
|
28
|
-
export { cli, stt, tts, llm };
|
|
30
|
+
export { cli, stt, tts, llm, multimodal };
|
package/src/ipc/job_main.ts
CHANGED
|
@@ -7,7 +7,7 @@ import { fork } from 'child_process';
|
|
|
7
7
|
import { EventEmitter, once } from 'events';
|
|
8
8
|
import type { Logger } from 'pino';
|
|
9
9
|
import { fileURLToPath } from 'url';
|
|
10
|
-
import type
|
|
10
|
+
import { type Agent, isAgent } from '../generator.js';
|
|
11
11
|
import type { RunningJobInfo } from '../job.js';
|
|
12
12
|
import { JobContext } from '../job.js';
|
|
13
13
|
import { JobProcess } from '../job.js';
|
|
@@ -93,7 +93,14 @@ if (process.send) {
|
|
|
93
93
|
// [0] `node'
|
|
94
94
|
// [1] import.meta.filename
|
|
95
95
|
// [2] import.meta.filename of function containing entry file
|
|
96
|
-
const
|
|
96
|
+
const moduleFile = process.argv[2];
|
|
97
|
+
const agent: Agent = await import(moduleFile).then((module) => {
|
|
98
|
+
const agent = module.default;
|
|
99
|
+
if (agent === undefined || !isAgent(agent)) {
|
|
100
|
+
throw new Error(`Unable to load agent: Missing or invalid default export in ${moduleFile}`);
|
|
101
|
+
}
|
|
102
|
+
return agent;
|
|
103
|
+
});
|
|
97
104
|
if (!agent.prewarm) {
|
|
98
105
|
agent.prewarm = defaultInitializeProcessFunc;
|
|
99
106
|
}
|
package/src/ipc/proc_pool.ts
CHANGED
package/src/job.ts
CHANGED
|
@@ -9,7 +9,7 @@ import type {
|
|
|
9
9
|
Room,
|
|
10
10
|
RtcConfiguration,
|
|
11
11
|
} from '@livekit/rtc-node';
|
|
12
|
-
import { RoomEvent, TrackKind } from '@livekit/rtc-node';
|
|
12
|
+
import { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';
|
|
13
13
|
import type { Logger } from 'pino';
|
|
14
14
|
import { log } from './log.js';
|
|
15
15
|
|
|
@@ -100,6 +100,42 @@ export class JobContext {
|
|
|
100
100
|
this.shutdownCallbacks.push(callback);
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
+
async waitForParticipant(identity?: string): Promise<RemoteParticipant> {
|
|
104
|
+
if (!this.#room.isConnected) {
|
|
105
|
+
throw new Error('room is not connected');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
for (const p of this.#room.remoteParticipants.values()) {
|
|
109
|
+
if ((!identity || p.identity === identity) && p.info.kind != ParticipantKind.AGENT) {
|
|
110
|
+
return p;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return new Promise((resolve, reject) => {
|
|
115
|
+
const onParticipantConnected = (participant: RemoteParticipant) => {
|
|
116
|
+
if (
|
|
117
|
+
(!identity || participant.identity === identity) &&
|
|
118
|
+
participant.info.kind != ParticipantKind.AGENT
|
|
119
|
+
) {
|
|
120
|
+
clearHandlers();
|
|
121
|
+
resolve(participant);
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
const onDisconnected = () => {
|
|
125
|
+
clearHandlers();
|
|
126
|
+
reject(new Error('Room disconnected while waiting for participant'));
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
const clearHandlers = () => {
|
|
130
|
+
this.#room.off(RoomEvent.ParticipantConnected, onParticipantConnected);
|
|
131
|
+
this.#room.off(RoomEvent.Disconnected, onDisconnected);
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
this.#room.on(RoomEvent.ParticipantConnected, onParticipantConnected);
|
|
135
|
+
this.#room.on(RoomEvent.Disconnected, onDisconnected);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
103
139
|
/**
|
|
104
140
|
* Connects the agent to the room.
|
|
105
141
|
*
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
+
import { type AudioSource } from '@livekit/rtc-node';
|
|
6
|
+
import { EventEmitter } from 'events';
|
|
7
|
+
import { AudioByteStream } from '../audio.js';
|
|
8
|
+
import type { TranscriptionForwarder } from '../transcription.js';
|
|
9
|
+
import { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
|
|
10
|
+
|
|
11
|
+
export const proto = {};
|
|
12
|
+
|
|
13
|
+
export class PlayoutHandle extends EventEmitter {
|
|
14
|
+
#audioSource: AudioSource;
|
|
15
|
+
#sampleRate: number;
|
|
16
|
+
#itemId: string;
|
|
17
|
+
#contentIndex: number;
|
|
18
|
+
/** @internal */
|
|
19
|
+
transcriptionFwd: TranscriptionForwarder;
|
|
20
|
+
/** @internal */
|
|
21
|
+
doneFut: Future;
|
|
22
|
+
/** @internal */
|
|
23
|
+
intFut: Future;
|
|
24
|
+
/** @internal */
|
|
25
|
+
#interrupted: boolean;
|
|
26
|
+
/** @internal */
|
|
27
|
+
pushedDuration: number;
|
|
28
|
+
/** @internal */
|
|
29
|
+
totalPlayedTime: number | undefined; // Set when playout is done
|
|
30
|
+
|
|
31
|
+
constructor(
|
|
32
|
+
audioSource: AudioSource,
|
|
33
|
+
sampleRate: number,
|
|
34
|
+
itemId: string,
|
|
35
|
+
contentIndex: number,
|
|
36
|
+
transcriptionFwd: TranscriptionForwarder,
|
|
37
|
+
) {
|
|
38
|
+
super();
|
|
39
|
+
this.#audioSource = audioSource;
|
|
40
|
+
this.#sampleRate = sampleRate;
|
|
41
|
+
this.#itemId = itemId;
|
|
42
|
+
this.#contentIndex = contentIndex;
|
|
43
|
+
this.transcriptionFwd = transcriptionFwd;
|
|
44
|
+
this.doneFut = new Future();
|
|
45
|
+
this.intFut = new Future();
|
|
46
|
+
this.#interrupted = false;
|
|
47
|
+
this.pushedDuration = 0;
|
|
48
|
+
this.totalPlayedTime = undefined;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
get itemId(): string {
|
|
52
|
+
return this.#itemId;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
get audioSamples(): number {
|
|
56
|
+
if (this.totalPlayedTime !== undefined) {
|
|
57
|
+
return Math.floor(this.totalPlayedTime * this.#sampleRate);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return Math.floor(this.pushedDuration - this.#audioSource.queuedDuration * this.#sampleRate);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
get textChars(): number {
|
|
64
|
+
return this.transcriptionFwd.currentCharacterIndex;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
get contentIndex(): number {
|
|
68
|
+
return this.#contentIndex;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get interrupted(): boolean {
|
|
72
|
+
return this.#interrupted;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
get done(): boolean {
|
|
76
|
+
return this.doneFut.done || this.#interrupted;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interrupt() {
|
|
80
|
+
if (this.doneFut.done) return;
|
|
81
|
+
this.intFut.resolve();
|
|
82
|
+
this.#interrupted = true;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export class AgentPlayout {
|
|
87
|
+
#audioSource: AudioSource;
|
|
88
|
+
#playoutTask: CancellablePromise<void> | null;
|
|
89
|
+
#sampleRate: number;
|
|
90
|
+
#numChannels: number;
|
|
91
|
+
#inFrameSize: number;
|
|
92
|
+
#outFrameSize: number;
|
|
93
|
+
constructor(
|
|
94
|
+
audioSource: AudioSource,
|
|
95
|
+
sampleRate: number,
|
|
96
|
+
numChannels: number,
|
|
97
|
+
inFrameSize: number,
|
|
98
|
+
outFrameSize: number,
|
|
99
|
+
) {
|
|
100
|
+
this.#audioSource = audioSource;
|
|
101
|
+
this.#playoutTask = null;
|
|
102
|
+
this.#sampleRate = sampleRate;
|
|
103
|
+
this.#numChannels = numChannels;
|
|
104
|
+
this.#inFrameSize = inFrameSize;
|
|
105
|
+
this.#outFrameSize = outFrameSize;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
play(
|
|
109
|
+
itemId: string,
|
|
110
|
+
contentIndex: number,
|
|
111
|
+
transcriptionFwd: TranscriptionForwarder,
|
|
112
|
+
textStream: AsyncIterableQueue<string>,
|
|
113
|
+
audioStream: AsyncIterableQueue<AudioFrame>,
|
|
114
|
+
): PlayoutHandle {
|
|
115
|
+
const handle = new PlayoutHandle(
|
|
116
|
+
this.#audioSource,
|
|
117
|
+
this.#sampleRate,
|
|
118
|
+
itemId,
|
|
119
|
+
contentIndex,
|
|
120
|
+
transcriptionFwd,
|
|
121
|
+
);
|
|
122
|
+
this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);
|
|
123
|
+
return handle;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
#makePlayoutTask(
|
|
127
|
+
oldTask: CancellablePromise<void> | null,
|
|
128
|
+
handle: PlayoutHandle,
|
|
129
|
+
textStream: AsyncIterableQueue<string>,
|
|
130
|
+
audioStream: AsyncIterableQueue<AudioFrame>,
|
|
131
|
+
): CancellablePromise<void> {
|
|
132
|
+
return new CancellablePromise<void>((resolve, reject, onCancel) => {
|
|
133
|
+
let cancelled = false;
|
|
134
|
+
onCancel(() => {
|
|
135
|
+
cancelled = true;
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
(async () => {
|
|
139
|
+
try {
|
|
140
|
+
if (oldTask) {
|
|
141
|
+
await gracefullyCancel(oldTask);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
let firstFrame = true;
|
|
145
|
+
|
|
146
|
+
const readText = () =>
|
|
147
|
+
new CancellablePromise<void>((resolveText, rejectText, onCancelText) => {
|
|
148
|
+
let cancelledText = false;
|
|
149
|
+
onCancelText(() => {
|
|
150
|
+
cancelledText = true;
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
(async () => {
|
|
154
|
+
try {
|
|
155
|
+
for await (const text of textStream) {
|
|
156
|
+
if (cancelledText || cancelled) {
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
handle.transcriptionFwd.pushText(text);
|
|
160
|
+
}
|
|
161
|
+
resolveText();
|
|
162
|
+
} catch (error) {
|
|
163
|
+
rejectText(error);
|
|
164
|
+
}
|
|
165
|
+
})();
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
const capture = () =>
|
|
169
|
+
new CancellablePromise<void>((resolveCapture, rejectCapture, onCancelCapture) => {
|
|
170
|
+
let cancelledCapture = false;
|
|
171
|
+
onCancelCapture(() => {
|
|
172
|
+
cancelledCapture = true;
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
(async () => {
|
|
176
|
+
try {
|
|
177
|
+
const samplesPerChannel = this.#outFrameSize;
|
|
178
|
+
const bstream = new AudioByteStream(
|
|
179
|
+
this.#sampleRate,
|
|
180
|
+
this.#numChannels,
|
|
181
|
+
samplesPerChannel,
|
|
182
|
+
);
|
|
183
|
+
|
|
184
|
+
for await (const frame of audioStream) {
|
|
185
|
+
if (cancelledCapture || cancelled) {
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
if (firstFrame) {
|
|
189
|
+
handle.transcriptionFwd.start();
|
|
190
|
+
firstFrame = false;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
handle.transcriptionFwd.pushAudio(frame);
|
|
194
|
+
|
|
195
|
+
for (const f of bstream.write(frame.data.buffer)) {
|
|
196
|
+
handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
|
|
197
|
+
await this.#audioSource.captureFrame(f);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (!cancelledCapture && !cancelled) {
|
|
202
|
+
for (const f of bstream.flush()) {
|
|
203
|
+
handle.pushedDuration += f.samplesPerChannel / f.sampleRate;
|
|
204
|
+
await this.#audioSource.captureFrame(f);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
handle.transcriptionFwd.markAudioComplete();
|
|
208
|
+
|
|
209
|
+
await this.#audioSource.waitForPlayout();
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
resolveCapture();
|
|
213
|
+
} catch (error) {
|
|
214
|
+
rejectCapture(error);
|
|
215
|
+
}
|
|
216
|
+
})();
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
const readTextTask = readText();
|
|
220
|
+
const captureTask = capture();
|
|
221
|
+
|
|
222
|
+
try {
|
|
223
|
+
await Promise.race([captureTask, handle.intFut.await]);
|
|
224
|
+
} finally {
|
|
225
|
+
if (!captureTask.isCancelled) {
|
|
226
|
+
await gracefullyCancel(captureTask);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
230
|
+
|
|
231
|
+
if (handle.interrupted || captureTask.error) {
|
|
232
|
+
this.#audioSource.clearQueue(); // make sure to remove any queued frames
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if (!readTextTask.isCancelled) {
|
|
236
|
+
await gracefullyCancel(readTextTask);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (!firstFrame && !handle.interrupted) {
|
|
240
|
+
handle.transcriptionFwd.markTextComplete();
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
handle.doneFut.resolve();
|
|
244
|
+
await handle.transcriptionFwd.close(handle.interrupted);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
resolve();
|
|
248
|
+
} catch (error) {
|
|
249
|
+
reject(error);
|
|
250
|
+
}
|
|
251
|
+
})();
|
|
252
|
+
});
|
|
253
|
+
}
|
|
254
|
+
}
|