@livekit/agents 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +2 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +2 -0
  6. package/dist/index.js.map +1 -1
  7. package/dist/llm/index.cjs +2 -0
  8. package/dist/llm/index.cjs.map +1 -1
  9. package/dist/llm/index.d.ts +1 -1
  10. package/dist/llm/index.d.ts.map +1 -1
  11. package/dist/llm/index.js +2 -0
  12. package/dist/llm/index.js.map +1 -1
  13. package/dist/llm/llm.cjs +47 -3
  14. package/dist/llm/llm.cjs.map +1 -1
  15. package/dist/llm/llm.d.ts +15 -2
  16. package/dist/llm/llm.d.ts.map +1 -1
  17. package/dist/llm/llm.js +46 -3
  18. package/dist/llm/llm.js.map +1 -1
  19. package/dist/metrics/base.cjs +44 -0
  20. package/dist/metrics/base.cjs.map +1 -0
  21. package/dist/metrics/base.d.ts +96 -0
  22. package/dist/metrics/base.d.ts.map +1 -0
  23. package/dist/metrics/base.js +20 -0
  24. package/dist/metrics/base.js.map +1 -0
  25. package/dist/metrics/index.cjs +35 -0
  26. package/dist/metrics/index.cjs.map +1 -0
  27. package/dist/metrics/index.d.ts +5 -0
  28. package/dist/metrics/index.d.ts.map +1 -0
  29. package/dist/metrics/index.js +9 -0
  30. package/dist/metrics/index.js.map +1 -0
  31. package/dist/metrics/usage_collector.cjs +53 -0
  32. package/dist/metrics/usage_collector.cjs.map +1 -0
  33. package/dist/metrics/usage_collector.d.ts +14 -0
  34. package/dist/metrics/usage_collector.d.ts.map +1 -0
  35. package/dist/metrics/usage_collector.js +29 -0
  36. package/dist/metrics/usage_collector.js.map +1 -0
  37. package/dist/metrics/utils.cjs +104 -0
  38. package/dist/metrics/utils.cjs.map +1 -0
  39. package/dist/metrics/utils.d.ts +10 -0
  40. package/dist/metrics/utils.d.ts.map +1 -0
  41. package/dist/metrics/utils.js +73 -0
  42. package/dist/metrics/utils.js.map +1 -0
  43. package/dist/multimodal/multimodal_agent.cjs +7 -13
  44. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  45. package/dist/multimodal/multimodal_agent.d.ts +1 -4
  46. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  47. package/dist/multimodal/multimodal_agent.js +7 -13
  48. package/dist/multimodal/multimodal_agent.js.map +1 -1
  49. package/dist/pipeline/agent_output.cjs +9 -2
  50. package/dist/pipeline/agent_output.cjs.map +1 -1
  51. package/dist/pipeline/agent_output.d.ts +1 -0
  52. package/dist/pipeline/agent_output.d.ts.map +1 -1
  53. package/dist/pipeline/agent_output.js +9 -2
  54. package/dist/pipeline/agent_output.js.map +1 -1
  55. package/dist/pipeline/index.cjs +2 -0
  56. package/dist/pipeline/index.cjs.map +1 -1
  57. package/dist/pipeline/index.d.ts +1 -1
  58. package/dist/pipeline/index.d.ts.map +1 -1
  59. package/dist/pipeline/index.js +3 -1
  60. package/dist/pipeline/index.js.map +1 -1
  61. package/dist/pipeline/pipeline_agent.cjs +168 -70
  62. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  63. package/dist/pipeline/pipeline_agent.d.ts +10 -4
  64. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  65. package/dist/pipeline/pipeline_agent.js +171 -73
  66. package/dist/pipeline/pipeline_agent.js.map +1 -1
  67. package/dist/pipeline/speech_handle.cjs +49 -1
  68. package/dist/pipeline/speech_handle.cjs.map +1 -1
  69. package/dist/pipeline/speech_handle.d.ts +12 -2
  70. package/dist/pipeline/speech_handle.d.ts.map +1 -1
  71. package/dist/pipeline/speech_handle.js +50 -2
  72. package/dist/pipeline/speech_handle.js.map +1 -1
  73. package/dist/stt/index.cjs.map +1 -1
  74. package/dist/stt/index.d.ts +1 -1
  75. package/dist/stt/index.d.ts.map +1 -1
  76. package/dist/stt/index.js.map +1 -1
  77. package/dist/stt/stream_adapter.cjs +15 -5
  78. package/dist/stt/stream_adapter.cjs.map +1 -1
  79. package/dist/stt/stream_adapter.d.ts +4 -1
  80. package/dist/stt/stream_adapter.d.ts.map +1 -1
  81. package/dist/stt/stream_adapter.js +15 -5
  82. package/dist/stt/stream_adapter.js.map +1 -1
  83. package/dist/stt/stt.cjs +46 -2
  84. package/dist/stt/stt.cjs.map +1 -1
  85. package/dist/stt/stt.d.ts +25 -3
  86. package/dist/stt/stt.d.ts.map +1 -1
  87. package/dist/stt/stt.js +46 -2
  88. package/dist/stt/stt.js.map +1 -1
  89. package/dist/tts/index.cjs +4 -2
  90. package/dist/tts/index.cjs.map +1 -1
  91. package/dist/tts/index.d.ts +1 -1
  92. package/dist/tts/index.d.ts.map +1 -1
  93. package/dist/tts/index.js +3 -1
  94. package/dist/tts/index.js.map +1 -1
  95. package/dist/tts/stream_adapter.cjs +14 -3
  96. package/dist/tts/stream_adapter.cjs.map +1 -1
  97. package/dist/tts/stream_adapter.d.ts +3 -0
  98. package/dist/tts/stream_adapter.d.ts.map +1 -1
  99. package/dist/tts/stream_adapter.js +15 -4
  100. package/dist/tts/stream_adapter.js.map +1 -1
  101. package/dist/tts/tts.cjs +109 -6
  102. package/dist/tts/tts.cjs.map +1 -1
  103. package/dist/tts/tts.d.ts +24 -1
  104. package/dist/tts/tts.d.ts.map +1 -1
  105. package/dist/tts/tts.js +107 -5
  106. package/dist/tts/tts.js.map +1 -1
  107. package/dist/vad.cjs +43 -2
  108. package/dist/vad.cjs.map +1 -1
  109. package/dist/vad.d.ts +21 -4
  110. package/dist/vad.d.ts.map +1 -1
  111. package/dist/vad.js +43 -2
  112. package/dist/vad.js.map +1 -1
  113. package/package.json +1 -1
  114. package/src/index.ts +2 -1
  115. package/src/llm/index.ts +2 -0
  116. package/src/llm/llm.ts +55 -3
  117. package/src/metrics/base.ts +127 -0
  118. package/src/metrics/index.ts +20 -0
  119. package/src/metrics/usage_collector.ts +40 -0
  120. package/src/metrics/utils.ts +100 -0
  121. package/src/multimodal/multimodal_agent.ts +12 -17
  122. package/src/pipeline/agent_output.ts +14 -7
  123. package/src/pipeline/index.ts +1 -1
  124. package/src/pipeline/pipeline_agent.ts +210 -95
  125. package/src/pipeline/speech_handle.ts +67 -2
  126. package/src/stt/index.ts +2 -0
  127. package/src/stt/stream_adapter.ts +17 -5
  128. package/src/stt/stt.ts +67 -3
  129. package/src/tts/index.ts +2 -0
  130. package/src/tts/stream_adapter.ts +17 -4
  131. package/src/tts/tts.ts +127 -4
  132. package/src/vad.ts +61 -4
@@ -0,0 +1,100 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { log } from '../log.js';
5
+ import type {
6
+ AgentMetrics,
7
+ LLMMetrics,
8
+ PipelineEOUMetrics,
9
+ PipelineLLMMetrics,
10
+ PipelineTTSMetrics,
11
+ STTMetrics,
12
+ TTSMetrics,
13
+ VADMetrics,
14
+ } from './base.js';
15
+
16
+ export const logMetrics = (metrics: AgentMetrics) => {
17
+ const logger = log();
18
+ if (isPipelineLLMMetrics(metrics)) {
19
+ logger
20
+ .child({
21
+ sequenceId: metrics.sequenceId,
22
+ ttft: metrics.ttft,
23
+ inputTokens: metrics.promptTokens,
24
+ outputTokens: metrics.completionTokens,
25
+ tokensPerSecond: metrics.tokensPerSecond,
26
+ })
27
+ .info('Pipeline LLM metrics');
28
+ } else if (isLLMMetrics(metrics)) {
29
+ logger
30
+ .child({
31
+ ttft: metrics.ttft,
32
+ inputTokens: metrics.promptTokens,
33
+ outputTokens: metrics.completionTokens,
34
+ tokensPerSecond: metrics.tokensPerSecond,
35
+ })
36
+ .info('LLM metrics');
37
+ } else if (isPipelineTTSMetrics(metrics)) {
38
+ logger
39
+ .child({
40
+ sequenceId: metrics.sequenceId,
41
+ ttfb: metrics.ttfb,
42
+ audioDuration: metrics.audioDuration,
43
+ })
44
+ .info('Pipeline TTS metrics');
45
+ } else if (isTTSMetrics(metrics)) {
46
+ logger
47
+ .child({
48
+ ttfb: metrics.ttfb,
49
+ audioDuration: metrics.audioDuration,
50
+ })
51
+ .info('TTS metrics');
52
+ } else if (isPipelineEOUMetrics(metrics)) {
53
+ logger
54
+ .child({
55
+ sequenceId: metrics.sequenceId,
56
+ endOfUtteranceDelay: metrics.endOfUtteranceDelay,
57
+ transcriptionDelay: metrics.transcriptionDelay,
58
+ })
59
+ .info('Pipeline EOU metrics');
60
+ } else if (isSTTMetrics(metrics)) {
61
+ logger
62
+ .child({
63
+ audioDuration: metrics.audioDuration,
64
+ })
65
+ .info('STT metrics');
66
+ }
67
+ };
68
+
69
+ export const isLLMMetrics = (metrics: AgentMetrics): metrics is LLMMetrics => {
70
+ return !!(metrics as LLMMetrics).ttft;
71
+ };
72
+
73
+ export const isPipelineLLMMetrics = (metrics: AgentMetrics): metrics is PipelineLLMMetrics => {
74
+ return isLLMMetrics(metrics) && !!(metrics as PipelineLLMMetrics).sequenceId;
75
+ };
76
+
77
+ export const isVADMetrics = (metrics: AgentMetrics): metrics is VADMetrics => {
78
+ return !!(metrics as VADMetrics).inferenceCount;
79
+ };
80
+
81
+ export const isPipelineEOUMetrics = (metrics: AgentMetrics): metrics is PipelineEOUMetrics => {
82
+ return !!(metrics as PipelineEOUMetrics).endOfUtteranceDelay;
83
+ };
84
+
85
+ export const isTTSMetrics = (metrics: AgentMetrics): metrics is TTSMetrics => {
86
+ return !!(metrics as TTSMetrics).ttfb;
87
+ };
88
+
89
+ export const isPipelineTTSMetrics = (metrics: AgentMetrics): metrics is PipelineTTSMetrics => {
90
+ return isTTSMetrics(metrics) && !!(metrics as PipelineTTSMetrics).sequenceId;
91
+ };
92
+
93
+ export const isSTTMetrics = (metrics: AgentMetrics): metrics is STTMetrics => {
94
+ return !(
95
+ isLLMMetrics(metrics) ||
96
+ isVADMetrics(metrics) ||
97
+ isPipelineEOUMetrics(metrics) ||
98
+ isTTSMetrics(metrics)
99
+ );
100
+ };
@@ -21,6 +21,7 @@ import { EventEmitter } from 'node:events';
21
21
  import { AudioByteStream } from '../audio.js';
22
22
  import * as llm from '../llm/index.js';
23
23
  import { log } from '../log.js';
24
+ import type { MultimodalLLMMetrics } from '../metrics/base.js';
24
25
  import { BasicTranscriptionForwarder } from '../transcription.js';
25
26
  import { findMicroTrackId } from '../utils.js';
26
27
  import { AgentPlayout, type PlayoutHandle } from './agent_playout.js';
@@ -60,7 +61,7 @@ export class MultimodalAgent extends EventEmitter {
60
61
  room: Room | null = null;
61
62
  linkedParticipant: RemoteParticipant | null = null;
62
63
  subscribedTrack: RemoteAudioTrack | null = null;
63
- readMicroTask: { promise: Promise<void>; cancel: () => void } | null = null;
64
+ readMicroTask: Promise<void> | null = null;
64
65
 
65
66
  constructor({
66
67
  model,
@@ -284,6 +285,7 @@ export class MultimodalAgent extends EventEmitter {
284
285
  });
285
286
 
286
287
  this.#session.on('input_speech_started', (ev: any) => {
288
+ this.emit('user_started_speaking');
287
289
  if (this.#playingHandle && !this.#playingHandle.done) {
288
290
  this.#playingHandle.interrupt();
289
291
 
@@ -326,6 +328,10 @@ export class MultimodalAgent extends EventEmitter {
326
328
  this.#updateState();
327
329
  });
328
330
 
331
+ this.#session.on('metrics_collected', (metrics: MultimodalLLMMetrics) => {
332
+ this.emit('metrics_collected', metrics);
333
+ });
334
+
329
335
  resolve(this.#session);
330
336
  });
331
337
  }
@@ -404,22 +410,11 @@ export class MultimodalAgent extends EventEmitter {
404
410
  };
405
411
  this.subscribedTrack = track;
406
412
 
407
- if (this.readMicroTask) {
408
- this.readMicroTask.cancel();
409
- }
410
-
411
- let cancel: () => void;
412
- this.readMicroTask = {
413
- promise: new Promise<void>((resolve, reject) => {
414
- cancel = () => {
415
- reject(new Error('Task cancelled'));
416
- };
417
- readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))
418
- .then(resolve)
419
- .catch(reject);
420
- }),
421
- cancel: () => cancel(),
422
- };
413
+ this.readMicroTask = new Promise<void>((resolve, reject) => {
414
+ readAudioStreamTask(new AudioStream(track, this.model.sampleRate, this.model.numChannels))
415
+ .then(resolve)
416
+ .catch(reject);
417
+ });
423
418
  }
424
419
 
425
420
  #getLocalTrackSid(): string | null {
@@ -13,6 +13,7 @@ export class SynthesisHandle {
13
13
  static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
14
14
 
15
15
  #speechId: string;
16
+ text?: string;
16
17
  ttsSource: SpeechSource;
17
18
  #agentPlayout: AgentPlayout;
18
19
  tts: TTS;
@@ -97,7 +98,7 @@ export class AgentOutput {
97
98
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
98
99
  return new CancellablePromise(async (resolve, _, onCancel) => {
99
100
  const ttsSource = await handle.ttsSource;
100
- let task: CancellablePromise<void>;
101
+ let task: CancellablePromise<string>;
101
102
  if (typeof ttsSource === 'string') {
102
103
  task = stringSynthesisTask(ttsSource, handle);
103
104
  } else {
@@ -113,6 +114,10 @@ export class AgentOutput {
113
114
  } finally {
114
115
  if (handle.intFut.done) {
115
116
  gracefullyCancel(task);
117
+ } else {
118
+ task.then((text) => {
119
+ handle.text = text;
120
+ });
116
121
  }
117
122
  }
118
123
 
@@ -121,9 +126,9 @@ export class AgentOutput {
121
126
  }
122
127
  }
123
128
 
124
- const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {
129
+ const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {
125
130
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
126
- return new CancellablePromise<void>(async (resolve, _, onCancel) => {
131
+ return new CancellablePromise(async (resolve, _, onCancel) => {
127
132
  let cancelled = false;
128
133
  onCancel(() => {
129
134
  cancelled = true;
@@ -141,16 +146,17 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
141
146
  }
142
147
  handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
143
148
 
144
- resolve();
149
+ resolve(text);
145
150
  });
146
151
  };
147
152
 
148
153
  const streamSynthesisTask = (
149
154
  stream: AsyncIterable<string>,
150
155
  handle: SynthesisHandle,
151
- ): CancellablePromise<void> => {
156
+ ): CancellablePromise<string> => {
152
157
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
153
- return new CancellablePromise<void>(async (resolve, _, onCancel) => {
158
+ return new CancellablePromise(async (resolve, _, onCancel) => {
159
+ let fullText = '';
154
160
  let cancelled = false;
155
161
  onCancel(() => {
156
162
  cancelled = true;
@@ -170,12 +176,13 @@ const streamSynthesisTask = (
170
176
  readGeneratedAudio();
171
177
 
172
178
  for await (const text of stream) {
179
+ fullText += text;
173
180
  if (cancelled) break;
174
181
  ttsStream.pushText(text);
175
182
  }
176
183
  ttsStream.flush();
177
184
  ttsStream.endInput();
178
185
 
179
- resolve();
186
+ resolve(fullText);
180
187
  });
181
188
  };
@@ -7,9 +7,9 @@ export {
7
7
  type BeforeTTSCallback,
8
8
  type BeforeLLMCallback,
9
9
  type VPACallbacks,
10
- type AgentCallContext,
11
10
  type AgentTranscriptionOptions,
12
11
  type VPAOptions,
13
12
  VPAEvent,
14
13
  VoicePipelineAgent,
14
+ AgentCallContext,
15
15
  } from './pipeline_agent.js';