@livekit/agents 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +6 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/inference_runner.cjs +38 -0
- package/dist/inference_runner.cjs.map +1 -0
- package/dist/inference_runner.d.ts +11 -0
- package/dist/inference_runner.d.ts.map +1 -0
- package/dist/inference_runner.js +14 -0
- package/dist/inference_runner.js.map +1 -0
- package/dist/ipc/index.cjs +23 -0
- package/dist/ipc/index.cjs.map +1 -0
- package/dist/ipc/index.d.ts +2 -0
- package/dist/ipc/index.d.ts.map +1 -0
- package/dist/ipc/index.js +2 -0
- package/dist/ipc/index.js.map +1 -0
- package/dist/ipc/inference_executor.cjs +17 -0
- package/dist/ipc/inference_executor.cjs.map +1 -0
- package/dist/ipc/inference_executor.d.ts +4 -0
- package/dist/ipc/inference_executor.d.ts.map +1 -0
- package/dist/ipc/inference_executor.js +1 -0
- package/dist/ipc/inference_executor.js.map +1 -0
- package/dist/ipc/inference_proc_executor.cjs +97 -0
- package/dist/ipc/inference_proc_executor.cjs.map +1 -0
- package/dist/ipc/inference_proc_executor.d.ts +23 -0
- package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
- package/dist/ipc/inference_proc_executor.js +72 -0
- package/dist/ipc/inference_proc_executor.js.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.cjs +90 -0
- package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
- package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
- package/dist/ipc/inference_proc_lazy_main.js +67 -0
- package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
- package/dist/ipc/job_executor.cjs +8 -7
- package/dist/ipc/job_executor.cjs.map +1 -1
- package/dist/ipc/job_executor.d.ts +14 -15
- package/dist/ipc/job_executor.d.ts.map +1 -1
- package/dist/ipc/job_executor.js +7 -6
- package/dist/ipc/job_executor.js.map +1 -1
- package/dist/ipc/job_proc_executor.cjs +108 -0
- package/dist/ipc/job_proc_executor.cjs.map +1 -0
- package/dist/ipc/job_proc_executor.d.ts +19 -0
- package/dist/ipc/job_proc_executor.d.ts.map +1 -0
- package/dist/ipc/job_proc_executor.js +83 -0
- package/dist/ipc/job_proc_executor.js.map +1 -0
- package/dist/ipc/{job_main.cjs → job_proc_lazy_main.cjs} +41 -36
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
- package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
- package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
- package/dist/ipc/{job_main.js → job_proc_lazy_main.js} +41 -11
- package/dist/ipc/job_proc_lazy_main.js.map +1 -0
- package/dist/ipc/message.cjs.map +1 -1
- package/dist/ipc/message.d.ts +17 -0
- package/dist/ipc/message.d.ts.map +1 -1
- package/dist/ipc/proc_pool.cjs +30 -4
- package/dist/ipc/proc_pool.cjs.map +1 -1
- package/dist/ipc/proc_pool.d.ts +5 -1
- package/dist/ipc/proc_pool.d.ts.map +1 -1
- package/dist/ipc/proc_pool.js +30 -4
- package/dist/ipc/proc_pool.js.map +1 -1
- package/dist/ipc/{proc_job_executor.cjs → supervised_proc.cjs} +58 -46
- package/dist/ipc/supervised_proc.cjs.map +1 -0
- package/dist/ipc/supervised_proc.d.ts +30 -0
- package/dist/ipc/supervised_proc.d.ts.map +1 -0
- package/dist/ipc/{proc_job_executor.js → supervised_proc.js} +54 -32
- package/dist/ipc/supervised_proc.js.map +1 -0
- package/dist/job.cjs +18 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.ts +9 -1
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +17 -1
- package/dist/job.js.map +1 -1
- package/dist/metrics/base.cjs +2 -2
- package/dist/metrics/base.cjs.map +1 -1
- package/dist/metrics/base.d.ts +1 -1
- package/dist/metrics/base.d.ts.map +1 -1
- package/dist/metrics/base.js +2 -2
- package/dist/metrics/base.js.map +1 -1
- package/dist/multimodal/agent_playout.cjs +13 -14
- package/dist/multimodal/agent_playout.cjs.map +1 -1
- package/dist/multimodal/agent_playout.d.ts +4 -4
- package/dist/multimodal/agent_playout.d.ts.map +1 -1
- package/dist/multimodal/agent_playout.js +13 -14
- package/dist/multimodal/agent_playout.js.map +1 -1
- package/dist/multimodal/multimodal_agent.cjs +12 -8
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +13 -9
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_output.cjs +20 -4
- package/dist/pipeline/agent_output.cjs.map +1 -1
- package/dist/pipeline/agent_output.d.ts +4 -2
- package/dist/pipeline/agent_output.d.ts.map +1 -1
- package/dist/pipeline/agent_output.js +20 -4
- package/dist/pipeline/agent_output.js.map +1 -1
- package/dist/pipeline/agent_playout.cjs +9 -3
- package/dist/pipeline/agent_playout.cjs.map +1 -1
- package/dist/pipeline/agent_playout.d.ts +4 -2
- package/dist/pipeline/agent_playout.d.ts.map +1 -1
- package/dist/pipeline/agent_playout.js +9 -3
- package/dist/pipeline/agent_playout.js.map +1 -1
- package/dist/pipeline/human_input.cjs +6 -0
- package/dist/pipeline/human_input.cjs.map +1 -1
- package/dist/pipeline/human_input.d.ts +3 -1
- package/dist/pipeline/human_input.d.ts.map +1 -1
- package/dist/pipeline/human_input.js +6 -0
- package/dist/pipeline/human_input.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +79 -12
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts +8 -0
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +79 -12
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +16 -4
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +16 -4
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/tokenize/basic/basic.cjs +2 -0
- package/dist/tokenize/basic/basic.cjs.map +1 -1
- package/dist/tokenize/basic/basic.d.ts +2 -0
- package/dist/tokenize/basic/basic.d.ts.map +1 -1
- package/dist/tokenize/basic/basic.js +1 -0
- package/dist/tokenize/basic/basic.js.map +1 -1
- package/dist/tokenize/basic/index.cjs +2 -0
- package/dist/tokenize/basic/index.cjs.map +1 -1
- package/dist/tokenize/basic/index.d.ts +1 -1
- package/dist/tokenize/basic/index.d.ts.map +1 -1
- package/dist/tokenize/basic/index.js +8 -1
- package/dist/tokenize/basic/index.js.map +1 -1
- package/dist/tokenize/token_stream.cjs +5 -3
- package/dist/tokenize/token_stream.cjs.map +1 -1
- package/dist/tokenize/token_stream.d.ts.map +1 -1
- package/dist/tokenize/token_stream.js +5 -3
- package/dist/tokenize/token_stream.js.map +1 -1
- package/dist/transcription.cjs +203 -86
- package/dist/transcription.cjs.map +1 -1
- package/dist/transcription.d.ts +24 -17
- package/dist/transcription.d.ts.map +1 -1
- package/dist/transcription.js +201 -85
- package/dist/transcription.js.map +1 -1
- package/dist/worker.cjs +42 -9
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +5 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +42 -9
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
- package/src/index.ts +3 -1
- package/src/inference_runner.ts +19 -0
- package/src/ipc/index.ts +5 -0
- package/src/ipc/inference_executor.ts +7 -0
- package/src/ipc/inference_proc_executor.ts +93 -0
- package/src/ipc/inference_proc_lazy_main.ts +86 -0
- package/src/ipc/job_executor.ts +15 -17
- package/src/ipc/job_proc_executor.ts +112 -0
- package/src/ipc/{job_main.ts → job_proc_lazy_main.ts} +44 -14
- package/src/ipc/message.ts +14 -1
- package/src/ipc/proc_pool.ts +33 -3
- package/src/ipc/{proc_job_executor.ts → supervised_proc.ts} +80 -30
- package/src/job.ts +21 -0
- package/src/metrics/base.ts +7 -10
- package/src/multimodal/agent_playout.ts +14 -16
- package/src/multimodal/multimodal_agent.ts +13 -9
- package/src/pipeline/agent_output.ts +34 -5
- package/src/pipeline/agent_playout.ts +10 -1
- package/src/pipeline/human_input.ts +8 -0
- package/src/pipeline/pipeline_agent.ts +96 -11
- package/src/stt/stream_adapter.ts +17 -5
- package/src/tokenize/basic/basic.ts +2 -0
- package/src/tokenize/basic/index.ts +7 -1
- package/src/tokenize/token_stream.ts +6 -3
- package/src/transcription.ts +270 -96
- package/src/worker.ts +42 -5
- package/dist/ipc/job_main.cjs.map +0 -1
- package/dist/ipc/job_main.d.ts +0 -8
- package/dist/ipc/job_main.d.ts.map +0 -1
- package/dist/ipc/job_main.js.map +0 -1
- package/dist/ipc/proc_job_executor.cjs.map +0 -1
- package/dist/ipc/proc_job_executor.d.ts +0 -15
- package/dist/ipc/proc_job_executor.d.ts.map +0 -1
- package/dist/ipc/proc_job_executor.js.map +0 -1
|
@@ -6,31 +6,54 @@ import { once } from 'node:events';
|
|
|
6
6
|
import type { RunningJobInfo } from '../job.js';
|
|
7
7
|
import { log, loggerOptions } from '../log.js';
|
|
8
8
|
import { Future } from '../utils.js';
|
|
9
|
-
import type { ProcOpts } from './job_executor.js';
|
|
10
|
-
import { JobExecutor } from './job_executor.js';
|
|
11
9
|
import type { IPCMessage } from './message.js';
|
|
12
10
|
|
|
13
|
-
export
|
|
11
|
+
export interface ProcOpts {
|
|
12
|
+
initializeTimeout: number;
|
|
13
|
+
closeTimeout: number;
|
|
14
|
+
memoryWarnMB: number;
|
|
15
|
+
memoryLimitMB: number;
|
|
16
|
+
pingInterval: number;
|
|
17
|
+
pingTimeout: number;
|
|
18
|
+
highPingThreshold: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export abstract class SupervisedProc {
|
|
14
22
|
#opts: ProcOpts;
|
|
15
23
|
#started = false;
|
|
16
24
|
#closing = false;
|
|
17
25
|
#runningJob?: RunningJobInfo = undefined;
|
|
18
|
-
|
|
26
|
+
proc?: ChildProcess;
|
|
19
27
|
#pingInterval?: ReturnType<typeof setInterval>;
|
|
28
|
+
#memoryWatch?: ReturnType<typeof setInterval>;
|
|
20
29
|
#pongTimeout?: ReturnType<typeof setTimeout>;
|
|
21
|
-
|
|
30
|
+
protected init = new Future();
|
|
22
31
|
#join = new Future();
|
|
23
32
|
#logger = log().child({ runningJob: this.#runningJob });
|
|
24
33
|
|
|
25
|
-
constructor(
|
|
26
|
-
|
|
34
|
+
constructor(
|
|
35
|
+
initializeTimeout: number,
|
|
36
|
+
closeTimeout: number,
|
|
37
|
+
memoryWarnMB: number,
|
|
38
|
+
memoryLimitMB: number,
|
|
39
|
+
pingInterval: number,
|
|
40
|
+
pingTimeout: number,
|
|
41
|
+
highPingThreshold: number,
|
|
42
|
+
) {
|
|
27
43
|
this.#opts = {
|
|
28
|
-
agent,
|
|
29
44
|
initializeTimeout,
|
|
30
45
|
closeTimeout,
|
|
46
|
+
memoryWarnMB,
|
|
47
|
+
memoryLimitMB,
|
|
48
|
+
pingInterval,
|
|
49
|
+
pingTimeout,
|
|
50
|
+
highPingThreshold,
|
|
31
51
|
};
|
|
32
52
|
}
|
|
33
53
|
|
|
54
|
+
abstract createProcess(): ChildProcess;
|
|
55
|
+
abstract mainTask(child: ChildProcess): Promise<void>;
|
|
56
|
+
|
|
34
57
|
get started(): boolean {
|
|
35
58
|
return this.#started;
|
|
36
59
|
}
|
|
@@ -46,36 +69,50 @@ export class ProcJobExecutor extends JobExecutor {
|
|
|
46
69
|
throw new Error('runner is closed');
|
|
47
70
|
}
|
|
48
71
|
|
|
49
|
-
this
|
|
50
|
-
m.runProcess({
|
|
51
|
-
agentFile: this.#opts.agent,
|
|
52
|
-
}),
|
|
53
|
-
);
|
|
72
|
+
this.proc = this.createProcess();
|
|
54
73
|
|
|
55
74
|
this.#started = true;
|
|
56
75
|
this.run();
|
|
57
76
|
}
|
|
58
77
|
|
|
59
78
|
async run() {
|
|
60
|
-
await this
|
|
79
|
+
await this.init.await;
|
|
61
80
|
|
|
62
81
|
this.#pingInterval = setInterval(() => {
|
|
63
|
-
this
|
|
64
|
-
}, this.
|
|
82
|
+
this.proc!.send({ case: 'pingRequest', value: { timestamp: Date.now() } });
|
|
83
|
+
}, this.#opts.pingInterval);
|
|
65
84
|
|
|
66
85
|
this.#pongTimeout = setTimeout(() => {
|
|
67
86
|
this.#logger.warn('job is unresponsive');
|
|
68
87
|
clearTimeout(this.#pongTimeout);
|
|
69
88
|
clearInterval(this.#pingInterval);
|
|
70
|
-
this
|
|
89
|
+
this.proc!.kill();
|
|
71
90
|
this.#join.resolve();
|
|
72
|
-
}, this.
|
|
91
|
+
}, this.#opts.pingTimeout);
|
|
92
|
+
|
|
93
|
+
this.#memoryWatch = setInterval(() => {
|
|
94
|
+
const memoryMB = process.memoryUsage().heapUsed / (1024 * 1024);
|
|
95
|
+
if (this.#opts.memoryLimitMB > 0 && memoryMB > this.#opts.memoryLimitMB) {
|
|
96
|
+
this.#logger
|
|
97
|
+
.child({ memoryUsageMB: memoryMB, memoryLimitMB: this.#opts.memoryLimitMB })
|
|
98
|
+
.error('process exceeded memory limit, killing process');
|
|
99
|
+
this.close();
|
|
100
|
+
} else if (this.#opts.memoryWarnMB > 0 && memoryMB > this.#opts.memoryWarnMB) {
|
|
101
|
+
this.#logger
|
|
102
|
+
.child({
|
|
103
|
+
memoryUsageMB: memoryMB,
|
|
104
|
+
memoryWarnMB: this.#opts.memoryWarnMB,
|
|
105
|
+
memoryLimitMB: this.#opts.memoryLimitMB,
|
|
106
|
+
})
|
|
107
|
+
.error('process memory usage is high');
|
|
108
|
+
}
|
|
109
|
+
});
|
|
73
110
|
|
|
74
111
|
const listener = (msg: IPCMessage) => {
|
|
75
112
|
switch (msg.case) {
|
|
76
113
|
case 'pongResponse': {
|
|
77
114
|
const delay = Date.now() - msg.value.timestamp;
|
|
78
|
-
if (delay > this.
|
|
115
|
+
if (delay > this.#opts.highPingThreshold) {
|
|
79
116
|
this.#logger.child({ delay }).warn('job executor is unresponsive');
|
|
80
117
|
}
|
|
81
118
|
this.#pongTimeout?.refresh();
|
|
@@ -87,21 +124,26 @@ export class ProcJobExecutor extends JobExecutor {
|
|
|
87
124
|
}
|
|
88
125
|
case 'done': {
|
|
89
126
|
this.#closing = true;
|
|
90
|
-
this
|
|
127
|
+
this.proc!.off('message', listener);
|
|
91
128
|
this.#join.resolve();
|
|
92
129
|
break;
|
|
93
130
|
}
|
|
94
131
|
}
|
|
95
132
|
};
|
|
96
|
-
this
|
|
97
|
-
this
|
|
133
|
+
this.proc!.on('message', listener);
|
|
134
|
+
this.proc!.on('error', (err) => {
|
|
98
135
|
if (this.#closing) return;
|
|
99
|
-
this.#logger
|
|
136
|
+
this.#logger
|
|
137
|
+
.child({ err })
|
|
138
|
+
.warn('job process exited unexpectedly; this likely means the error above caused a crash');
|
|
100
139
|
clearTimeout(this.#pongTimeout);
|
|
101
140
|
clearInterval(this.#pingInterval);
|
|
141
|
+
clearInterval(this.#memoryWatch);
|
|
102
142
|
this.#join.resolve();
|
|
103
143
|
});
|
|
104
144
|
|
|
145
|
+
this.mainTask(this.proc!);
|
|
146
|
+
|
|
105
147
|
await this.#join.await;
|
|
106
148
|
}
|
|
107
149
|
|
|
@@ -116,17 +158,25 @@ export class ProcJobExecutor extends JobExecutor {
|
|
|
116
158
|
async initialize() {
|
|
117
159
|
const timer = setTimeout(() => {
|
|
118
160
|
const err = new Error('runner initialization timed out');
|
|
119
|
-
this
|
|
161
|
+
this.init.reject(err);
|
|
120
162
|
throw err;
|
|
121
163
|
}, this.#opts.initializeTimeout);
|
|
122
|
-
this
|
|
123
|
-
|
|
164
|
+
this.proc!.send({
|
|
165
|
+
case: 'initializeRequest',
|
|
166
|
+
value: {
|
|
167
|
+
loggerOptions,
|
|
168
|
+
pingInterval: this.#opts.pingInterval,
|
|
169
|
+
pingTimeout: this.#opts.pingTimeout,
|
|
170
|
+
highPingThreshold: this.#opts.highPingThreshold,
|
|
171
|
+
},
|
|
172
|
+
});
|
|
173
|
+
await once(this.proc!, 'message').then(([msg]: IPCMessage[]) => {
|
|
124
174
|
clearTimeout(timer);
|
|
125
175
|
if (msg!.case !== 'initializeResponse') {
|
|
126
176
|
throw new Error('first message must be InitializeResponse');
|
|
127
177
|
}
|
|
128
178
|
});
|
|
129
|
-
this
|
|
179
|
+
this.init.resolve();
|
|
130
180
|
}
|
|
131
181
|
|
|
132
182
|
async close() {
|
|
@@ -136,11 +186,11 @@ export class ProcJobExecutor extends JobExecutor {
|
|
|
136
186
|
this.#closing = true;
|
|
137
187
|
|
|
138
188
|
if (!this.#runningJob) {
|
|
139
|
-
this
|
|
189
|
+
this.proc!.kill();
|
|
140
190
|
this.#join.resolve();
|
|
141
191
|
}
|
|
142
192
|
|
|
143
|
-
this
|
|
193
|
+
this.proc!.send({ case: 'shutdownRequest' });
|
|
144
194
|
|
|
145
195
|
const timer = setTimeout(() => {
|
|
146
196
|
this.#logger.error('job shutdown is taking too much time');
|
|
@@ -157,6 +207,6 @@ export class ProcJobExecutor extends JobExecutor {
|
|
|
157
207
|
throw new Error('executor already has a running job');
|
|
158
208
|
}
|
|
159
209
|
this.#runningJob = info;
|
|
160
|
-
this
|
|
210
|
+
this.proc!.send({ case: 'startJobRequest', value: { runningJob: info } });
|
|
161
211
|
}
|
|
162
212
|
}
|
package/src/job.ts
CHANGED
|
@@ -11,8 +11,21 @@ import type {
|
|
|
11
11
|
} from '@livekit/rtc-node';
|
|
12
12
|
import { ParticipantKind, RoomEvent, TrackKind } from '@livekit/rtc-node';
|
|
13
13
|
import type { Logger } from 'pino';
|
|
14
|
+
import type { InferenceExecutor } from './ipc/inference_executor.js';
|
|
14
15
|
import { log } from './log.js';
|
|
15
16
|
|
|
17
|
+
export class CurrentJobContext {
|
|
18
|
+
static #current: JobContext;
|
|
19
|
+
|
|
20
|
+
constructor(proc: JobContext) {
|
|
21
|
+
CurrentJobContext.#current = proc;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static getCurrent(): JobContext {
|
|
25
|
+
return CurrentJobContext.#current;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
16
29
|
/** Which tracks, if any, should the agent automatically subscribe to? */
|
|
17
30
|
export enum AutoSubscribe {
|
|
18
31
|
SUBSCRIBE_ALL,
|
|
@@ -60,6 +73,7 @@ export class JobContext {
|
|
|
60
73
|
};
|
|
61
74
|
} = {};
|
|
62
75
|
#logger: Logger;
|
|
76
|
+
#inferenceExecutor: InferenceExecutor;
|
|
63
77
|
|
|
64
78
|
constructor(
|
|
65
79
|
proc: JobProcess,
|
|
@@ -67,6 +81,7 @@ export class JobContext {
|
|
|
67
81
|
room: Room,
|
|
68
82
|
onConnect: () => void,
|
|
69
83
|
onShutdown: (s: string) => void,
|
|
84
|
+
inferenceExecutor: InferenceExecutor,
|
|
70
85
|
) {
|
|
71
86
|
this.#proc = proc;
|
|
72
87
|
this.#info = info;
|
|
@@ -76,6 +91,7 @@ export class JobContext {
|
|
|
76
91
|
this.onParticipantConnected = this.onParticipantConnected.bind(this);
|
|
77
92
|
this.#room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);
|
|
78
93
|
this.#logger = log().child({ info: this.#info });
|
|
94
|
+
this.#inferenceExecutor = inferenceExecutor;
|
|
79
95
|
}
|
|
80
96
|
|
|
81
97
|
get proc(): JobProcess {
|
|
@@ -96,6 +112,11 @@ export class JobContext {
|
|
|
96
112
|
return this.#room.localParticipant;
|
|
97
113
|
}
|
|
98
114
|
|
|
115
|
+
/** @returns The global inference executor */
|
|
116
|
+
get inferenceExecutor(): InferenceExecutor {
|
|
117
|
+
return this.#inferenceExecutor;
|
|
118
|
+
}
|
|
119
|
+
|
|
99
120
|
/** Adds a promise to be awaited when {@link JobContext.shutdown | shutdown} is called. */
|
|
100
121
|
addShutdownCallback(callback: () => Promise<void>) {
|
|
101
122
|
this.shutdownCallbacks.push(callback);
|
package/src/metrics/base.ts
CHANGED
|
@@ -86,16 +86,13 @@ export class MultimodalLLMError extends Error {
|
|
|
86
86
|
type?: string;
|
|
87
87
|
reason?: string;
|
|
88
88
|
code?: string;
|
|
89
|
-
constructor(
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
options?: ErrorOptions,
|
|
97
|
-
) {
|
|
98
|
-
super(message, options);
|
|
89
|
+
constructor({
|
|
90
|
+
type,
|
|
91
|
+
reason,
|
|
92
|
+
code,
|
|
93
|
+
message,
|
|
94
|
+
}: { type?: string; reason?: string; code?: string; message?: string } = {}) {
|
|
95
|
+
super(message);
|
|
99
96
|
this.type = type;
|
|
100
97
|
this.reason = reason;
|
|
101
98
|
this.code = code;
|
|
@@ -5,7 +5,7 @@ import type { AudioFrame } from '@livekit/rtc-node';
|
|
|
5
5
|
import { type AudioSource } from '@livekit/rtc-node';
|
|
6
6
|
import { EventEmitter } from 'node:events';
|
|
7
7
|
import { AudioByteStream } from '../audio.js';
|
|
8
|
-
import type {
|
|
8
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
9
9
|
import { type AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
|
|
10
10
|
|
|
11
11
|
export const proto = {};
|
|
@@ -16,7 +16,7 @@ export class PlayoutHandle extends EventEmitter {
|
|
|
16
16
|
#itemId: string;
|
|
17
17
|
#contentIndex: number;
|
|
18
18
|
/** @internal */
|
|
19
|
-
|
|
19
|
+
synchronizer: TextAudioSynchronizer;
|
|
20
20
|
/** @internal */
|
|
21
21
|
doneFut: Future;
|
|
22
22
|
/** @internal */
|
|
@@ -33,14 +33,14 @@ export class PlayoutHandle extends EventEmitter {
|
|
|
33
33
|
sampleRate: number,
|
|
34
34
|
itemId: string,
|
|
35
35
|
contentIndex: number,
|
|
36
|
-
|
|
36
|
+
synchronizer: TextAudioSynchronizer,
|
|
37
37
|
) {
|
|
38
38
|
super();
|
|
39
39
|
this.#audioSource = audioSource;
|
|
40
40
|
this.#sampleRate = sampleRate;
|
|
41
41
|
this.#itemId = itemId;
|
|
42
42
|
this.#contentIndex = contentIndex;
|
|
43
|
-
this.
|
|
43
|
+
this.synchronizer = synchronizer;
|
|
44
44
|
this.doneFut = new Future();
|
|
45
45
|
this.intFut = new Future();
|
|
46
46
|
this.#interrupted = false;
|
|
@@ -63,7 +63,7 @@ export class PlayoutHandle extends EventEmitter {
|
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
get textChars(): number {
|
|
66
|
-
return this.
|
|
66
|
+
return this.synchronizer.playedText.length;
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
get contentIndex(): number {
|
|
@@ -111,7 +111,7 @@ export class AgentPlayout extends EventEmitter {
|
|
|
111
111
|
play(
|
|
112
112
|
itemId: string,
|
|
113
113
|
contentIndex: number,
|
|
114
|
-
|
|
114
|
+
synchronizer: TextAudioSynchronizer,
|
|
115
115
|
textStream: AsyncIterableQueue<string>,
|
|
116
116
|
audioStream: AsyncIterableQueue<AudioFrame>,
|
|
117
117
|
): PlayoutHandle {
|
|
@@ -120,7 +120,7 @@ export class AgentPlayout extends EventEmitter {
|
|
|
120
120
|
this.#sampleRate,
|
|
121
121
|
itemId,
|
|
122
122
|
contentIndex,
|
|
123
|
-
|
|
123
|
+
synchronizer,
|
|
124
124
|
);
|
|
125
125
|
this.#playoutTask = this.#makePlayoutTask(this.#playoutTask, handle, textStream, audioStream);
|
|
126
126
|
return handle;
|
|
@@ -159,8 +159,9 @@ export class AgentPlayout extends EventEmitter {
|
|
|
159
159
|
if (cancelledText || cancelled) {
|
|
160
160
|
break;
|
|
161
161
|
}
|
|
162
|
-
handle.
|
|
162
|
+
handle.synchronizer.pushText(text);
|
|
163
163
|
}
|
|
164
|
+
handle.synchronizer.markTextSegmentEnd();
|
|
164
165
|
resolveText();
|
|
165
166
|
} catch (error) {
|
|
166
167
|
rejectText(error);
|
|
@@ -189,12 +190,12 @@ export class AgentPlayout extends EventEmitter {
|
|
|
189
190
|
break;
|
|
190
191
|
}
|
|
191
192
|
if (firstFrame) {
|
|
192
|
-
handle.
|
|
193
|
+
handle.synchronizer.segmentPlayoutStarted();
|
|
193
194
|
this.emit('playout_started');
|
|
194
195
|
firstFrame = false;
|
|
195
196
|
}
|
|
196
197
|
|
|
197
|
-
handle.
|
|
198
|
+
handle.synchronizer.pushAudio(frame);
|
|
198
199
|
|
|
199
200
|
for (const f of bstream.write(frame.data.buffer)) {
|
|
200
201
|
handle.pushedDuration += (f.samplesPerChannel / f.sampleRate) * 1000;
|
|
@@ -208,7 +209,7 @@ export class AgentPlayout extends EventEmitter {
|
|
|
208
209
|
await this.#audioSource.captureFrame(f);
|
|
209
210
|
}
|
|
210
211
|
|
|
211
|
-
handle.
|
|
212
|
+
handle.synchronizer.markAudioSegmentEnd();
|
|
212
213
|
|
|
213
214
|
await this.#audioSource.waitForPlayout();
|
|
214
215
|
}
|
|
@@ -233,6 +234,7 @@ export class AgentPlayout extends EventEmitter {
|
|
|
233
234
|
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
234
235
|
|
|
235
236
|
if (handle.interrupted || captureTask.error) {
|
|
237
|
+
await handle.synchronizer.close(true);
|
|
236
238
|
this.#audioSource.clearQueue(); // make sure to remove any queued frames
|
|
237
239
|
}
|
|
238
240
|
|
|
@@ -241,15 +243,11 @@ export class AgentPlayout extends EventEmitter {
|
|
|
241
243
|
}
|
|
242
244
|
|
|
243
245
|
if (!firstFrame) {
|
|
244
|
-
if (!handle.interrupted) {
|
|
245
|
-
handle.transcriptionFwd.markTextComplete();
|
|
246
|
-
}
|
|
247
|
-
|
|
248
246
|
this.emit('playout_stopped', handle.interrupted);
|
|
249
247
|
}
|
|
250
248
|
|
|
251
249
|
handle.doneFut.resolve();
|
|
252
|
-
await handle.
|
|
250
|
+
await handle.synchronizer.close(false);
|
|
253
251
|
}
|
|
254
252
|
|
|
255
253
|
resolve();
|
|
@@ -22,7 +22,7 @@ import { AudioByteStream } from '../audio.js';
|
|
|
22
22
|
import * as llm from '../llm/index.js';
|
|
23
23
|
import { log } from '../log.js';
|
|
24
24
|
import type { MultimodalLLMMetrics } from '../metrics/base.js';
|
|
25
|
-
import {
|
|
25
|
+
import { TextAudioSynchronizer, defaultTextSyncOptions } from '../transcription.js';
|
|
26
26
|
import { findMicroTrackId } from '../utils.js';
|
|
27
27
|
import { AgentPlayout, type PlayoutHandle } from './agent_playout.js';
|
|
28
28
|
|
|
@@ -190,7 +190,7 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
190
190
|
this.emit('agent_stopped_speaking');
|
|
191
191
|
this.#speaking = false;
|
|
192
192
|
if (this.#playingHandle) {
|
|
193
|
-
let text = this.#playingHandle.
|
|
193
|
+
let text = this.#playingHandle.synchronizer.playedText;
|
|
194
194
|
if (interrupted) {
|
|
195
195
|
text += '…';
|
|
196
196
|
}
|
|
@@ -245,17 +245,21 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
245
245
|
// openai.realtime.RealtimeContent
|
|
246
246
|
if (message.contentType === 'text') return;
|
|
247
247
|
|
|
248
|
-
const
|
|
249
|
-
|
|
250
|
-
this
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
248
|
+
const synchronizer = new TextAudioSynchronizer(defaultTextSyncOptions);
|
|
249
|
+
synchronizer.on('textUpdated', (text) => {
|
|
250
|
+
this.#publishTranscription(
|
|
251
|
+
this.room!.localParticipant!.identity!,
|
|
252
|
+
this.#getLocalTrackSid()!,
|
|
253
|
+
text.text,
|
|
254
|
+
text.final,
|
|
255
|
+
text.id,
|
|
256
|
+
);
|
|
257
|
+
});
|
|
254
258
|
|
|
255
259
|
const handle = this.#agentPlayout?.play(
|
|
256
260
|
message.itemId,
|
|
257
261
|
message.contentIndex,
|
|
258
|
-
|
|
262
|
+
synchronizer,
|
|
259
263
|
message.textStream,
|
|
260
264
|
message.audioStream,
|
|
261
265
|
);
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
5
|
import { log } from '../log.js';
|
|
6
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
6
7
|
import { SynthesizeStream, type TTS } from '../tts/index.js';
|
|
7
8
|
import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';
|
|
8
9
|
import type { AgentPlayout, PlayoutHandle } from './agent_playout.js';
|
|
@@ -21,12 +22,20 @@ export class SynthesisHandle {
|
|
|
21
22
|
#playHandle?: PlayoutHandle;
|
|
22
23
|
intFut = new Future();
|
|
23
24
|
#logger = log();
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
synchronizer: TextAudioSynchronizer;
|
|
26
|
+
|
|
27
|
+
constructor(
|
|
28
|
+
speechId: string,
|
|
29
|
+
ttsSource: SpeechSource,
|
|
30
|
+
agentPlayout: AgentPlayout,
|
|
31
|
+
tts: TTS,
|
|
32
|
+
synchronizer: TextAudioSynchronizer,
|
|
33
|
+
) {
|
|
26
34
|
this.#speechId = speechId;
|
|
27
35
|
this.ttsSource = ttsSource;
|
|
28
36
|
this.#agentPlayout = agentPlayout;
|
|
29
37
|
this.tts = tts;
|
|
38
|
+
this.synchronizer = synchronizer;
|
|
30
39
|
}
|
|
31
40
|
|
|
32
41
|
get speechId(): string {
|
|
@@ -51,7 +60,7 @@ export class SynthesisHandle {
|
|
|
51
60
|
throw new Error('synthesis was interrupted');
|
|
52
61
|
}
|
|
53
62
|
|
|
54
|
-
this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);
|
|
63
|
+
this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue, this.synchronizer);
|
|
55
64
|
return this.#playHandle;
|
|
56
65
|
}
|
|
57
66
|
|
|
@@ -86,8 +95,18 @@ export class AgentOutput {
|
|
|
86
95
|
await Promise.all(this.#tasks);
|
|
87
96
|
}
|
|
88
97
|
|
|
89
|
-
synthesize(
|
|
90
|
-
|
|
98
|
+
synthesize(
|
|
99
|
+
speechId: string,
|
|
100
|
+
ttsSource: SpeechSource,
|
|
101
|
+
synchronizer: TextAudioSynchronizer,
|
|
102
|
+
): SynthesisHandle {
|
|
103
|
+
const handle = new SynthesisHandle(
|
|
104
|
+
speechId,
|
|
105
|
+
ttsSource,
|
|
106
|
+
this.#agentPlayout,
|
|
107
|
+
this.#tts,
|
|
108
|
+
synchronizer,
|
|
109
|
+
);
|
|
91
110
|
const task = this.#synthesize(handle);
|
|
92
111
|
this.#tasks.push(task);
|
|
93
112
|
task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));
|
|
@@ -136,6 +155,8 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
|
|
|
136
155
|
|
|
137
156
|
const ttsStream = handle.tts.stream();
|
|
138
157
|
ttsStream.pushText(text);
|
|
158
|
+
handle.synchronizer.pushText(text);
|
|
159
|
+
handle.synchronizer.markTextSegmentEnd();
|
|
139
160
|
ttsStream.flush();
|
|
140
161
|
ttsStream.endInput();
|
|
141
162
|
for await (const audio of ttsStream) {
|
|
@@ -178,8 +199,16 @@ const streamSynthesisTask = (
|
|
|
178
199
|
for await (const text of stream) {
|
|
179
200
|
fullText += text;
|
|
180
201
|
if (cancelled) break;
|
|
202
|
+
handle.synchronizer.pushText(text);
|
|
181
203
|
ttsStream.pushText(text);
|
|
182
204
|
}
|
|
205
|
+
handle.synchronizer.markTextSegmentEnd();
|
|
206
|
+
|
|
207
|
+
// end the audio queue early if there is no actual text to turn into speech
|
|
208
|
+
if (!fullText || fullText.trim().length === 0) {
|
|
209
|
+
cancelled = true;
|
|
210
|
+
handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
|
|
211
|
+
}
|
|
183
212
|
ttsStream.flush();
|
|
184
213
|
ttsStream.endInput();
|
|
185
214
|
|
|
@@ -5,6 +5,7 @@ import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
|
|
|
5
5
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
6
|
import EventEmitter from 'node:events';
|
|
7
7
|
import { log } from '../log.js';
|
|
8
|
+
import type { TextAudioSynchronizer } from '../transcription.js';
|
|
8
9
|
import { CancellablePromise, Future, gracefullyCancel } from '../utils.js';
|
|
9
10
|
import { SynthesisHandle } from './agent_output.js';
|
|
10
11
|
|
|
@@ -23,6 +24,7 @@ export class PlayoutHandle {
|
|
|
23
24
|
#audioSource: AudioSource;
|
|
24
25
|
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
|
|
25
26
|
totalPlayedTime?: number;
|
|
27
|
+
synchronizer: TextAudioSynchronizer;
|
|
26
28
|
#interrupted = false;
|
|
27
29
|
pushedDuration = 0;
|
|
28
30
|
intFut = new Future();
|
|
@@ -32,10 +34,12 @@ export class PlayoutHandle {
|
|
|
32
34
|
speechId: string,
|
|
33
35
|
audioSource: AudioSource,
|
|
34
36
|
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,
|
|
37
|
+
synchronizer: TextAudioSynchronizer,
|
|
35
38
|
) {
|
|
36
39
|
this.#speechId = speechId;
|
|
37
40
|
this.#audioSource = audioSource;
|
|
38
41
|
this.playoutSource = playoutSource;
|
|
42
|
+
this.synchronizer = synchronizer;
|
|
39
43
|
}
|
|
40
44
|
|
|
41
45
|
get speechId(): string {
|
|
@@ -91,12 +95,13 @@ export class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentP
|
|
|
91
95
|
play(
|
|
92
96
|
speechId: string,
|
|
93
97
|
playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,
|
|
98
|
+
synchronizer: TextAudioSynchronizer,
|
|
94
99
|
): PlayoutHandle {
|
|
95
100
|
if (this.#closed) {
|
|
96
101
|
throw new Error('source closed');
|
|
97
102
|
}
|
|
98
103
|
|
|
99
|
-
const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource);
|
|
104
|
+
const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);
|
|
100
105
|
|
|
101
106
|
this.#playoutTask = this.#playout(handle, this.#playoutTask);
|
|
102
107
|
return handle;
|
|
@@ -109,6 +114,7 @@ export class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentP
|
|
|
109
114
|
handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
|
|
110
115
|
|
|
111
116
|
if (handle.interrupted || captureTask.error) {
|
|
117
|
+
handle.synchronizer.close(true);
|
|
112
118
|
this.#audioSource.clearQueue(); // make sure to remove any queued frames
|
|
113
119
|
}
|
|
114
120
|
|
|
@@ -156,9 +162,11 @@ export class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentP
|
|
|
156
162
|
.child({ speechId: handle.speechId })
|
|
157
163
|
.debug('started playing the first time');
|
|
158
164
|
this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);
|
|
165
|
+
handle.synchronizer.segmentPlayoutStarted();
|
|
159
166
|
firstFrame = false;
|
|
160
167
|
}
|
|
161
168
|
handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;
|
|
169
|
+
handle.synchronizer.pushAudio(frame);
|
|
162
170
|
await this.#audioSource.captureFrame(frame);
|
|
163
171
|
await this.#audioSource.waitForPlayout();
|
|
164
172
|
}
|
|
@@ -170,6 +178,7 @@ export class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentP
|
|
|
170
178
|
// await this.#audioSource.waitForPlayout();
|
|
171
179
|
// }
|
|
172
180
|
|
|
181
|
+
handle.synchronizer.close(false);
|
|
173
182
|
resolve();
|
|
174
183
|
});
|
|
175
184
|
|
|
@@ -57,6 +57,14 @@ export class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInp
|
|
|
57
57
|
this.#subscribeToMicrophone();
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
get participant(): RemoteParticipant {
|
|
61
|
+
return this.#participant;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
get subscribedTrack(): RemoteAudioTrack | undefined {
|
|
65
|
+
return this.#subscribedTrack;
|
|
66
|
+
}
|
|
67
|
+
|
|
60
68
|
#subscribeToMicrophone(): void {
|
|
61
69
|
if (!this.#participant) {
|
|
62
70
|
this.#logger.error('Participant is not set');
|