@livekit/agents 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs +47 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.ts +15 -2
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +46 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/metrics/base.cjs +44 -0
- package/dist/metrics/base.cjs.map +1 -0
- package/dist/metrics/base.d.ts +96 -0
- package/dist/metrics/base.d.ts.map +1 -0
- package/dist/metrics/base.js +20 -0
- package/dist/metrics/base.js.map +1 -0
- package/dist/metrics/index.cjs +35 -0
- package/dist/metrics/index.cjs.map +1 -0
- package/dist/metrics/index.d.ts +5 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +9 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/usage_collector.cjs +53 -0
- package/dist/metrics/usage_collector.cjs.map +1 -0
- package/dist/metrics/usage_collector.d.ts +14 -0
- package/dist/metrics/usage_collector.d.ts.map +1 -0
- package/dist/metrics/usage_collector.js +29 -0
- package/dist/metrics/usage_collector.js.map +1 -0
- package/dist/metrics/utils.cjs +104 -0
- package/dist/metrics/utils.cjs.map +1 -0
- package/dist/metrics/utils.d.ts +10 -0
- package/dist/metrics/utils.d.ts.map +1 -0
- package/dist/metrics/utils.js +73 -0
- package/dist/metrics/utils.js.map +1 -0
- package/dist/multimodal/multimodal_agent.cjs +7 -13
- package/dist/multimodal/multimodal_agent.cjs.map +1 -1
- package/dist/multimodal/multimodal_agent.d.ts +1 -4
- package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
- package/dist/multimodal/multimodal_agent.js +7 -13
- package/dist/multimodal/multimodal_agent.js.map +1 -1
- package/dist/pipeline/agent_output.cjs +9 -2
- package/dist/pipeline/agent_output.cjs.map +1 -1
- package/dist/pipeline/agent_output.d.ts +1 -0
- package/dist/pipeline/agent_output.d.ts.map +1 -1
- package/dist/pipeline/agent_output.js +9 -2
- package/dist/pipeline/agent_output.js.map +1 -1
- package/dist/pipeline/index.cjs +2 -0
- package/dist/pipeline/index.cjs.map +1 -1
- package/dist/pipeline/index.d.ts +1 -1
- package/dist/pipeline/index.d.ts.map +1 -1
- package/dist/pipeline/index.js +3 -1
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/pipeline_agent.cjs +168 -70
- package/dist/pipeline/pipeline_agent.cjs.map +1 -1
- package/dist/pipeline/pipeline_agent.d.ts +10 -4
- package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
- package/dist/pipeline/pipeline_agent.js +171 -73
- package/dist/pipeline/pipeline_agent.js.map +1 -1
- package/dist/pipeline/speech_handle.cjs +49 -1
- package/dist/pipeline/speech_handle.cjs.map +1 -1
- package/dist/pipeline/speech_handle.d.ts +12 -2
- package/dist/pipeline/speech_handle.d.ts.map +1 -1
- package/dist/pipeline/speech_handle.js +50 -2
- package/dist/pipeline/speech_handle.js.map +1 -1
- package/dist/stt/index.cjs.map +1 -1
- package/dist/stt/index.d.ts +1 -1
- package/dist/stt/index.d.ts.map +1 -1
- package/dist/stt/index.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +15 -5
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.ts +4 -1
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +15 -5
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +46 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.ts +25 -3
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +46 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/index.cjs +4 -2
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.ts +1 -1
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +3 -1
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +14 -3
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.ts +3 -0
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +15 -4
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +109 -6
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts +24 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +107 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/vad.cjs +43 -2
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.ts +21 -4
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +43 -2
- package/dist/vad.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +2 -1
- package/src/llm/index.ts +2 -0
- package/src/llm/llm.ts +55 -3
- package/src/metrics/base.ts +127 -0
- package/src/metrics/index.ts +20 -0
- package/src/metrics/usage_collector.ts +40 -0
- package/src/metrics/utils.ts +100 -0
- package/src/multimodal/multimodal_agent.ts +12 -17
- package/src/pipeline/agent_output.ts +14 -7
- package/src/pipeline/index.ts +1 -1
- package/src/pipeline/pipeline_agent.ts +210 -95
- package/src/pipeline/speech_handle.ts +67 -2
- package/src/stt/index.ts +2 -0
- package/src/stt/stream_adapter.ts +17 -5
- package/src/stt/stt.ts +67 -3
- package/src/tts/index.ts +2 -0
- package/src/tts/stream_adapter.ts +17 -4
- package/src/tts/tts.ts +127 -4
- package/src/vad.ts +61 -4
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { log } from '../log.js';
|
|
5
|
+
import type {
|
|
6
|
+
AgentMetrics,
|
|
7
|
+
LLMMetrics,
|
|
8
|
+
PipelineEOUMetrics,
|
|
9
|
+
PipelineLLMMetrics,
|
|
10
|
+
PipelineTTSMetrics,
|
|
11
|
+
STTMetrics,
|
|
12
|
+
TTSMetrics,
|
|
13
|
+
VADMetrics,
|
|
14
|
+
} from './base.js';
|
|
15
|
+
|
|
16
|
+
export const logMetrics = (metrics: AgentMetrics) => {
|
|
17
|
+
const logger = log();
|
|
18
|
+
if (isPipelineLLMMetrics(metrics)) {
|
|
19
|
+
logger
|
|
20
|
+
.child({
|
|
21
|
+
sequenceId: metrics.sequenceId,
|
|
22
|
+
ttft: metrics.ttft,
|
|
23
|
+
inputTokens: metrics.promptTokens,
|
|
24
|
+
outputTokens: metrics.completionTokens,
|
|
25
|
+
tokensPerSecond: metrics.tokensPerSecond,
|
|
26
|
+
})
|
|
27
|
+
.info('Pipeline LLM metrics');
|
|
28
|
+
} else if (isLLMMetrics(metrics)) {
|
|
29
|
+
logger
|
|
30
|
+
.child({
|
|
31
|
+
ttft: metrics.ttft,
|
|
32
|
+
inputTokens: metrics.promptTokens,
|
|
33
|
+
outputTokens: metrics.completionTokens,
|
|
34
|
+
tokensPerSecond: metrics.tokensPerSecond,
|
|
35
|
+
})
|
|
36
|
+
.info('LLM metrics');
|
|
37
|
+
} else if (isPipelineTTSMetrics(metrics)) {
|
|
38
|
+
logger
|
|
39
|
+
.child({
|
|
40
|
+
sequenceId: metrics.sequenceId,
|
|
41
|
+
ttfb: metrics.ttfb,
|
|
42
|
+
audioDuration: metrics.audioDuration,
|
|
43
|
+
})
|
|
44
|
+
.info('Pipeline TTS metrics');
|
|
45
|
+
} else if (isTTSMetrics(metrics)) {
|
|
46
|
+
logger
|
|
47
|
+
.child({
|
|
48
|
+
ttfb: metrics.ttfb,
|
|
49
|
+
audioDuration: metrics.audioDuration,
|
|
50
|
+
})
|
|
51
|
+
.info('TTS metrics');
|
|
52
|
+
} else if (isPipelineEOUMetrics(metrics)) {
|
|
53
|
+
logger
|
|
54
|
+
.child({
|
|
55
|
+
sequenceId: metrics.sequenceId,
|
|
56
|
+
endOfUtteranceDelay: metrics.endOfUtteranceDelay,
|
|
57
|
+
transcriptionDelay: metrics.transcriptionDelay,
|
|
58
|
+
})
|
|
59
|
+
.info('Pipeline EOU metrics');
|
|
60
|
+
} else if (isSTTMetrics(metrics)) {
|
|
61
|
+
logger
|
|
62
|
+
.child({
|
|
63
|
+
audioDuration: metrics.audioDuration,
|
|
64
|
+
})
|
|
65
|
+
.info('STT metrics');
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
export const isLLMMetrics = (metrics: AgentMetrics): metrics is LLMMetrics => {
|
|
70
|
+
return !!(metrics as LLMMetrics).ttft;
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
export const isPipelineLLMMetrics = (metrics: AgentMetrics): metrics is PipelineLLMMetrics => {
|
|
74
|
+
return isLLMMetrics(metrics) && !!(metrics as PipelineLLMMetrics).sequenceId;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export const isVADMetrics = (metrics: AgentMetrics): metrics is VADMetrics => {
|
|
78
|
+
return !!(metrics as VADMetrics).inferenceCount;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export const isPipelineEOUMetrics = (metrics: AgentMetrics): metrics is PipelineEOUMetrics => {
|
|
82
|
+
return !!(metrics as PipelineEOUMetrics).endOfUtteranceDelay;
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
export const isTTSMetrics = (metrics: AgentMetrics): metrics is TTSMetrics => {
|
|
86
|
+
return !!(metrics as TTSMetrics).ttfb;
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
export const isPipelineTTSMetrics = (metrics: AgentMetrics): metrics is PipelineTTSMetrics => {
|
|
90
|
+
return isTTSMetrics(metrics) && !!(metrics as PipelineTTSMetrics).sequenceId;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
export const isSTTMetrics = (metrics: AgentMetrics): metrics is STTMetrics => {
|
|
94
|
+
return !(
|
|
95
|
+
isLLMMetrics(metrics) ||
|
|
96
|
+
isVADMetrics(metrics) ||
|
|
97
|
+
isPipelineEOUMetrics(metrics) ||
|
|
98
|
+
isTTSMetrics(metrics)
|
|
99
|
+
);
|
|
100
|
+
};
|
|
@@ -21,6 +21,7 @@ import { EventEmitter } from 'node:events';
|
|
|
21
21
|
import { AudioByteStream } from '../audio.js';
|
|
22
22
|
import * as llm from '../llm/index.js';
|
|
23
23
|
import { log } from '../log.js';
|
|
24
|
+
import type { MultimodalLLMMetrics } from '../metrics/base.js';
|
|
24
25
|
import { BasicTranscriptionForwarder } from '../transcription.js';
|
|
25
26
|
import { findMicroTrackId } from '../utils.js';
|
|
26
27
|
import { AgentPlayout, type PlayoutHandle } from './agent_playout.js';
|
|
@@ -60,7 +61,7 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
60
61
|
room: Room | null = null;
|
|
61
62
|
linkedParticipant: RemoteParticipant | null = null;
|
|
62
63
|
subscribedTrack: RemoteAudioTrack | null = null;
|
|
63
|
-
readMicroTask:
|
|
64
|
+
readMicroTask: Promise<void> | null = null;
|
|
64
65
|
|
|
65
66
|
constructor({
|
|
66
67
|
model,
|
|
@@ -284,6 +285,7 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
284
285
|
});
|
|
285
286
|
|
|
286
287
|
this.#session.on('input_speech_started', (ev: any) => {
|
|
288
|
+
this.emit('user_started_speaking');
|
|
287
289
|
if (this.#playingHandle && !this.#playingHandle.done) {
|
|
288
290
|
this.#playingHandle.interrupt();
|
|
289
291
|
|
|
@@ -326,6 +328,10 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
326
328
|
this.#updateState();
|
|
327
329
|
});
|
|
328
330
|
|
|
331
|
+
this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {
|
|
332
|
+
this.emit('metrics_collected', metrics);
|
|
333
|
+
});
|
|
334
|
+
|
|
329
335
|
resolve(this.#session);
|
|
330
336
|
});
|
|
331
337
|
}
|
|
@@ -404,22 +410,11 @@ export class MultimodalAgent extends EventEmitter {
|
|
|
404
410
|
};
|
|
405
411
|
this.subscribedTrack = track;
|
|
406
412
|
|
|
407
|
-
|
|
408
|
-
this.
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
this.readMicroTask = {
|
|
413
|
-
promise: new Promise<void>((resolve, reject) => {
|
|
414
|
-
cancel = () => {
|
|
415
|
-
reject(new Error('Task cancelled'));
|
|
416
|
-
};
|
|
417
|
-
readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))
|
|
418
|
-
.then(resolve)
|
|
419
|
-
.catch(reject);
|
|
420
|
-
}),
|
|
421
|
-
cancel: () => cancel(),
|
|
422
|
-
};
|
|
413
|
+
this.readMicroTask = new Promise<void>((resolve, reject) => {
|
|
414
|
+
readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))
|
|
415
|
+
.then(resolve)
|
|
416
|
+
.catch(reject);
|
|
417
|
+
});
|
|
423
418
|
}
|
|
424
419
|
|
|
425
420
|
#getLocalTrackSid(): string | null {
|
|
@@ -13,6 +13,7 @@ export class SynthesisHandle {
|
|
|
13
13
|
static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
|
|
14
14
|
|
|
15
15
|
#speechId: string;
|
|
16
|
+
text?: string;
|
|
16
17
|
ttsSource: SpeechSource;
|
|
17
18
|
#agentPlayout: AgentPlayout;
|
|
18
19
|
tts: TTS;
|
|
@@ -97,7 +98,7 @@ export class AgentOutput {
|
|
|
97
98
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
98
99
|
return new CancellablePromise(async (resolve, _, onCancel) => {
|
|
99
100
|
const ttsSource = await handle.ttsSource;
|
|
100
|
-
let task: CancellablePromise<
|
|
101
|
+
let task: CancellablePromise<string>;
|
|
101
102
|
if (typeof ttsSource === 'string') {
|
|
102
103
|
task = stringSynthesisTask(ttsSource, handle);
|
|
103
104
|
} else {
|
|
@@ -113,6 +114,10 @@ export class AgentOutput {
|
|
|
113
114
|
} finally {
|
|
114
115
|
if (handle.intFut.done) {
|
|
115
116
|
gracefullyCancel(task);
|
|
117
|
+
} else {
|
|
118
|
+
task.then((text) => {
|
|
119
|
+
handle.text = text;
|
|
120
|
+
});
|
|
116
121
|
}
|
|
117
122
|
}
|
|
118
123
|
|
|
@@ -121,9 +126,9 @@ export class AgentOutput {
|
|
|
121
126
|
}
|
|
122
127
|
}
|
|
123
128
|
|
|
124
|
-
const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<
|
|
129
|
+
const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {
|
|
125
130
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
126
|
-
return new CancellablePromise
|
|
131
|
+
return new CancellablePromise(async (resolve, _, onCancel) => {
|
|
127
132
|
let cancelled = false;
|
|
128
133
|
onCancel(() => {
|
|
129
134
|
cancelled = true;
|
|
@@ -141,16 +146,17 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
|
|
|
141
146
|
}
|
|
142
147
|
handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
|
|
143
148
|
|
|
144
|
-
resolve();
|
|
149
|
+
resolve(text);
|
|
145
150
|
});
|
|
146
151
|
};
|
|
147
152
|
|
|
148
153
|
const streamSynthesisTask = (
|
|
149
154
|
stream: AsyncIterable<string>,
|
|
150
155
|
handle: SynthesisHandle,
|
|
151
|
-
): CancellablePromise<
|
|
156
|
+
): CancellablePromise<string> => {
|
|
152
157
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
153
|
-
return new CancellablePromise
|
|
158
|
+
return new CancellablePromise(async (resolve, _, onCancel) => {
|
|
159
|
+
let fullText = '';
|
|
154
160
|
let cancelled = false;
|
|
155
161
|
onCancel(() => {
|
|
156
162
|
cancelled = true;
|
|
@@ -170,12 +176,13 @@ const streamSynthesisTask = (
|
|
|
170
176
|
readGeneratedAudio();
|
|
171
177
|
|
|
172
178
|
for await (const text of stream) {
|
|
179
|
+
fullText += text;
|
|
173
180
|
if (cancelled) break;
|
|
174
181
|
ttsStream.pushText(text);
|
|
175
182
|
}
|
|
176
183
|
ttsStream.flush();
|
|
177
184
|
ttsStream.endInput();
|
|
178
185
|
|
|
179
|
-
resolve();
|
|
186
|
+
resolve(fullText);
|
|
180
187
|
});
|
|
181
188
|
};
|
package/src/pipeline/index.ts
CHANGED