@livekit/agents 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +12 -12
  9. package/dist/inference/api_protos.d.ts +12 -12
  10. package/dist/inference/tts.cjs +1 -1
  11. package/dist/inference/tts.cjs.map +1 -1
  12. package/dist/inference/tts.js +1 -1
  13. package/dist/inference/tts.js.map +1 -1
  14. package/dist/ipc/job_proc_lazy_main.cjs +6 -2
  15. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  16. package/dist/ipc/job_proc_lazy_main.js +6 -2
  17. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  18. package/dist/job.cjs +31 -0
  19. package/dist/job.cjs.map +1 -1
  20. package/dist/job.d.cts +6 -0
  21. package/dist/job.d.ts +6 -0
  22. package/dist/job.d.ts.map +1 -1
  23. package/dist/job.js +31 -0
  24. package/dist/job.js.map +1 -1
  25. package/dist/llm/chat_context.cjs +33 -0
  26. package/dist/llm/chat_context.cjs.map +1 -1
  27. package/dist/llm/chat_context.d.cts +22 -2
  28. package/dist/llm/chat_context.d.ts +22 -2
  29. package/dist/llm/chat_context.d.ts.map +1 -1
  30. package/dist/llm/chat_context.js +32 -0
  31. package/dist/llm/chat_context.js.map +1 -1
  32. package/dist/llm/index.cjs +2 -0
  33. package/dist/llm/index.cjs.map +1 -1
  34. package/dist/llm/index.d.cts +1 -1
  35. package/dist/llm/index.d.ts +1 -1
  36. package/dist/llm/index.d.ts.map +1 -1
  37. package/dist/llm/index.js +2 -0
  38. package/dist/llm/index.js.map +1 -1
  39. package/dist/llm/llm.cjs.map +1 -1
  40. package/dist/llm/llm.d.ts.map +1 -1
  41. package/dist/llm/llm.js.map +1 -1
  42. package/dist/llm/provider_format/google.test.cjs +48 -0
  43. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  44. package/dist/llm/provider_format/google.test.js +54 -1
  45. package/dist/llm/provider_format/google.test.js.map +1 -1
  46. package/dist/llm/provider_format/index.d.cts +1 -1
  47. package/dist/llm/provider_format/index.d.ts +1 -1
  48. package/dist/llm/provider_format/openai.cjs +1 -2
  49. package/dist/llm/provider_format/openai.cjs.map +1 -1
  50. package/dist/llm/provider_format/openai.js +1 -2
  51. package/dist/llm/provider_format/openai.js.map +1 -1
  52. package/dist/llm/provider_format/openai.test.cjs +32 -0
  53. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  54. package/dist/llm/provider_format/openai.test.js +38 -1
  55. package/dist/llm/provider_format/openai.test.js.map +1 -1
  56. package/dist/log.cjs.map +1 -1
  57. package/dist/log.d.ts.map +1 -1
  58. package/dist/log.js.map +1 -1
  59. package/dist/stt/stt.cjs +3 -0
  60. package/dist/stt/stt.cjs.map +1 -1
  61. package/dist/stt/stt.d.cts +1 -0
  62. package/dist/stt/stt.d.ts +1 -0
  63. package/dist/stt/stt.d.ts.map +1 -1
  64. package/dist/stt/stt.js +3 -0
  65. package/dist/stt/stt.js.map +1 -1
  66. package/dist/telemetry/index.cjs +51 -0
  67. package/dist/telemetry/index.cjs.map +1 -0
  68. package/dist/telemetry/index.d.cts +4 -0
  69. package/dist/telemetry/index.d.ts +4 -0
  70. package/dist/telemetry/index.d.ts.map +1 -0
  71. package/dist/telemetry/index.js +12 -0
  72. package/dist/telemetry/index.js.map +1 -0
  73. package/dist/telemetry/trace_types.cjs +191 -0
  74. package/dist/telemetry/trace_types.cjs.map +1 -0
  75. package/dist/telemetry/trace_types.d.cts +56 -0
  76. package/dist/telemetry/trace_types.d.ts +56 -0
  77. package/dist/telemetry/trace_types.d.ts.map +1 -0
  78. package/dist/telemetry/trace_types.js +113 -0
  79. package/dist/telemetry/trace_types.js.map +1 -0
  80. package/dist/telemetry/traces.cjs +196 -0
  81. package/dist/telemetry/traces.cjs.map +1 -0
  82. package/dist/telemetry/traces.d.cts +97 -0
  83. package/dist/telemetry/traces.d.ts +97 -0
  84. package/dist/telemetry/traces.d.ts.map +1 -0
  85. package/dist/telemetry/traces.js +173 -0
  86. package/dist/telemetry/traces.js.map +1 -0
  87. package/dist/telemetry/utils.cjs +86 -0
  88. package/dist/telemetry/utils.cjs.map +1 -0
  89. package/dist/telemetry/utils.d.cts +5 -0
  90. package/dist/telemetry/utils.d.ts +5 -0
  91. package/dist/telemetry/utils.d.ts.map +1 -0
  92. package/dist/telemetry/utils.js +51 -0
  93. package/dist/telemetry/utils.js.map +1 -0
  94. package/dist/tts/tts.cjs +3 -0
  95. package/dist/tts/tts.cjs.map +1 -1
  96. package/dist/tts/tts.d.cts +1 -0
  97. package/dist/tts/tts.d.ts +1 -0
  98. package/dist/tts/tts.d.ts.map +1 -1
  99. package/dist/tts/tts.js +3 -0
  100. package/dist/tts/tts.js.map +1 -1
  101. package/dist/vad.cjs +3 -0
  102. package/dist/vad.cjs.map +1 -1
  103. package/dist/vad.d.cts +1 -0
  104. package/dist/vad.d.ts +1 -0
  105. package/dist/vad.d.ts.map +1 -1
  106. package/dist/vad.js +3 -0
  107. package/dist/vad.js.map +1 -1
  108. package/dist/voice/agent.cjs +15 -0
  109. package/dist/voice/agent.cjs.map +1 -1
  110. package/dist/voice/agent.d.cts +4 -1
  111. package/dist/voice/agent.d.ts +4 -1
  112. package/dist/voice/agent.d.ts.map +1 -1
  113. package/dist/voice/agent.js +15 -0
  114. package/dist/voice/agent.js.map +1 -1
  115. package/dist/voice/agent_activity.cjs +5 -0
  116. package/dist/voice/agent_activity.cjs.map +1 -1
  117. package/dist/voice/agent_activity.d.ts.map +1 -1
  118. package/dist/voice/agent_activity.js +5 -0
  119. package/dist/voice/agent_activity.js.map +1 -1
  120. package/dist/voice/agent_session.cjs +29 -1
  121. package/dist/voice/agent_session.cjs.map +1 -1
  122. package/dist/voice/agent_session.d.cts +6 -2
  123. package/dist/voice/agent_session.d.ts +6 -2
  124. package/dist/voice/agent_session.d.ts.map +1 -1
  125. package/dist/voice/agent_session.js +30 -2
  126. package/dist/voice/agent_session.js.map +1 -1
  127. package/dist/voice/audio_recognition.cjs +1 -1
  128. package/dist/voice/audio_recognition.cjs.map +1 -1
  129. package/dist/voice/audio_recognition.d.ts.map +1 -1
  130. package/dist/voice/audio_recognition.js +1 -1
  131. package/dist/voice/audio_recognition.js.map +1 -1
  132. package/dist/voice/generation.cjs.map +1 -1
  133. package/dist/voice/generation.d.ts.map +1 -1
  134. package/dist/voice/generation.js.map +1 -1
  135. package/dist/voice/index.cjs +2 -0
  136. package/dist/voice/index.cjs.map +1 -1
  137. package/dist/voice/index.d.cts +1 -0
  138. package/dist/voice/index.d.ts +1 -0
  139. package/dist/voice/index.d.ts.map +1 -1
  140. package/dist/voice/index.js +1 -0
  141. package/dist/voice/index.js.map +1 -1
  142. package/dist/voice/report.cjs +69 -0
  143. package/dist/voice/report.cjs.map +1 -0
  144. package/dist/voice/report.d.cts +26 -0
  145. package/dist/voice/report.d.ts +26 -0
  146. package/dist/voice/report.d.ts.map +1 -0
  147. package/dist/voice/report.js +44 -0
  148. package/dist/voice/report.js.map +1 -0
  149. package/package.json +10 -3
  150. package/src/index.ts +2 -1
  151. package/src/inference/tts.ts +1 -1
  152. package/src/ipc/job_proc_lazy_main.ts +10 -2
  153. package/src/job.ts +48 -0
  154. package/src/llm/chat_context.ts +53 -1
  155. package/src/llm/index.ts +1 -0
  156. package/src/llm/llm.ts +2 -0
  157. package/src/llm/provider_format/google.test.ts +72 -1
  158. package/src/llm/provider_format/openai.test.ts +55 -1
  159. package/src/llm/provider_format/openai.ts +3 -2
  160. package/src/log.ts +1 -0
  161. package/src/stt/stt.ts +4 -0
  162. package/src/telemetry/index.ts +10 -0
  163. package/src/telemetry/trace_types.ts +88 -0
  164. package/src/telemetry/traces.ts +266 -0
  165. package/src/telemetry/utils.ts +61 -0
  166. package/src/tts/tts.ts +8 -0
  167. package/src/vad.ts +4 -0
  168. package/src/voice/agent.ts +22 -0
  169. package/src/voice/agent_activity.ts +9 -0
  170. package/src/voice/agent_session.ts +44 -1
  171. package/src/voice/audio_recognition.ts +3 -1
  172. package/src/voice/generation.ts +3 -0
  173. package/src/voice/index.ts +1 -0
  174. package/src/voice/report.ts +77 -0
@@ -14,7 +14,7 @@ import {
14
14
  type TTSModelString,
15
15
  } from '../inference/index.js';
16
16
  import { getJobContext } from '../job.js';
17
- import { ChatContext, ChatMessage } from '../llm/chat_context.js';
17
+ import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
18
18
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
19
19
  import type { LLMError } from '../llm/llm.js';
20
20
  import { log } from '../log.js';
@@ -26,6 +26,7 @@ import type { Agent } from './agent.js';
26
26
  import { AgentActivity } from './agent_activity.js';
27
27
  import type { _TurnDetector } from './audio_recognition.js';
28
28
  import {
29
+ type AgentEvent,
29
30
  AgentSessionEventTypes,
30
31
  type AgentState,
31
32
  type AgentStateChangedEvent,
@@ -127,6 +128,9 @@ export class AgentSession<
127
128
  private closingTask: Promise<void> | null = null;
128
129
  private userAwayTimer: NodeJS.Timeout | null = null;
129
130
 
131
+ /** @internal */
132
+ _recordedEvents: AgentEvent[] = [];
133
+
130
134
  constructor(opts: AgentSessionOptions<UserData>) {
131
135
  super();
132
136
 
@@ -174,6 +178,15 @@ export class AgentSession<
174
178
  this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
175
179
  }
176
180
 
181
+ emit<K extends keyof AgentSessionCallbacks>(
182
+ event: K,
183
+ ...args: Parameters<AgentSessionCallbacks[K]>
184
+ ): boolean {
185
+ const eventData = args[0] as AgentEvent;
186
+ this._recordedEvents.push(eventData);
187
+ return super.emit(event, ...args);
188
+ }
189
+
177
190
  get input(): AgentInput {
178
191
  return this._input;
179
192
  }
@@ -199,15 +212,20 @@ export class AgentSession<
199
212
  }
200
213
 
201
214
  async start({
215
+ // TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
216
+ // TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
217
+ // TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
202
218
  agent,
203
219
  room,
204
220
  inputOptions,
205
221
  outputOptions,
222
+ record = true,
206
223
  }: {
207
224
  agent: Agent;
208
225
  room: Room;
209
226
  inputOptions?: Partial<RoomInputOptions>;
210
227
  outputOptions?: Partial<RoomOutputOptions>;
228
+ record?: boolean;
211
229
  }): Promise<void> {
212
230
  if (this.started) {
213
231
  return;
@@ -247,6 +265,17 @@ export class AgentSession<
247
265
  this.logger.debug('Auto-connecting to room via job context');
248
266
  tasks.push(ctx.connect());
249
267
  }
268
+
269
+ if (record) {
270
+ if (ctx._primaryAgentSession === undefined) {
271
+ ctx._primaryAgentSession = this;
272
+ } else {
273
+ throw new Error(
274
+ 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
275
+ );
276
+ }
277
+ }
278
+
250
279
  // TODO(AJS-265): add shutdown callback to job context
251
280
  tasks.push(this.updateActivity(this.agent));
252
281
 
@@ -341,6 +370,8 @@ export class AgentSession<
341
370
  // TODO(AJS-129): add lock to agent activity core lifecycle
342
371
  this.nextActivity = new AgentActivity(agent, this);
343
372
 
373
+ const previousActivity = this.activity;
374
+
344
375
  if (this.activity) {
345
376
  await this.activity.drain();
346
377
  await this.activity.close();
@@ -349,6 +380,14 @@ export class AgentSession<
349
380
  this.activity = this.nextActivity;
350
381
  this.nextActivity = undefined;
351
382
 
383
+ this._chatCtx.insert(
384
+ new AgentHandoffItem({
385
+ oldAgentId: previousActivity?.agent.id,
386
+ newAgentId: agent.id,
387
+ }),
388
+ );
389
+ this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
390
+
352
391
  await this.activity.start();
353
392
 
354
393
  if (this._input.audio) {
@@ -419,6 +458,8 @@ export class AgentSession<
419
458
  return;
420
459
  }
421
460
 
461
+ // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes
462
+ // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan
422
463
  const oldState = this._agentState;
423
464
  this._agentState = state;
424
465
 
@@ -441,6 +482,8 @@ export class AgentSession<
441
482
  return;
442
483
  }
443
484
 
485
+ // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes
486
+ // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan
444
487
  const oldState = this.userState;
445
488
  this.userState = state;
446
489
 
@@ -57,6 +57,8 @@ export interface AudioRecognitionOptions {
57
57
  maxEndpointingDelay: number;
58
58
  }
59
59
 
60
+ // TODO(brian): PR3 - Add span: private _userTurnSpan?: Span, create lazily in _ensureUserTurnSpan() method (tracer.startSpan('user_turn') with participant attributes)
61
+ // TODO(brian): PR3 - Add span: 'eou_detection' span when running EOU detection (in runEOUDetection method)
60
62
  export class AudioRecognition {
61
63
  private hooks: RecognitionHooks;
62
64
  private stt?: STTNode;
@@ -356,7 +358,7 @@ export class AudioRecognition {
356
358
 
357
359
  if (turnDetector) {
358
360
  this.logger.debug('Running turn detector model');
359
- if (!turnDetector.supportsLanguage(this.lastLanguage)) {
361
+ if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {
360
362
  this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);
361
363
  } else {
362
364
  const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);
@@ -377,6 +377,7 @@ export function updateInstructions(options: {
377
377
  }
378
378
  }
379
379
 
380
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('llm_node') decorator/wrapper
380
381
  export function performLLMInference(
381
382
  node: LLMNode,
382
383
  chatCtx: ChatContext,
@@ -467,6 +468,7 @@ export function performLLMInference(
467
468
  ];
468
469
  }
469
470
 
471
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('tts_node') decorator/wrapper
470
472
  export function performTTSInference(
471
473
  node: TTSNode,
472
474
  text: ReadableStream<string>,
@@ -650,6 +652,7 @@ export function performAudioForwarding(
650
652
  ];
651
653
  }
652
654
 
655
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('function_tool') wrapper for each tool execution
653
656
  export function performToolExecutions({
654
657
  session,
655
658
  speechHandle,
@@ -6,5 +6,6 @@ export { AgentSession, type AgentSessionOptions } from './agent_session.js';
6
6
  export * from './avatar/index.js';
7
7
  export * from './background_audio.js';
8
8
  export * from './events.js';
9
+ export * from './report.js';
9
10
  export * from './room_io/index.js';
10
11
  export { RunContext } from './run_context.js';
@@ -0,0 +1,77 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { ChatContext } from '../llm/chat_context.js';
5
+ import type { VoiceOptions } from './agent_session.js';
6
+ import type { AgentEvent } from './events.js';
7
+
8
+ export interface SessionReport {
9
+ jobId: string;
10
+ roomId: string;
11
+ room: string;
12
+ options: VoiceOptions;
13
+ events: AgentEvent[];
14
+ chatHistory: ChatContext;
15
+ enableUserDataTraining: boolean;
16
+ timestamp: number;
17
+ }
18
+
19
+ export interface SessionReportOptions {
20
+ jobId: string;
21
+ roomId: string;
22
+ room: string;
23
+ options: VoiceOptions;
24
+ events: AgentEvent[];
25
+ chatHistory: ChatContext;
26
+ enableUserDataTraining?: boolean;
27
+ timestamp?: number;
28
+ }
29
+
30
+ export function createSessionReport(opts: SessionReportOptions): SessionReport {
31
+ return {
32
+ jobId: opts.jobId,
33
+ roomId: opts.roomId,
34
+ room: opts.room,
35
+ options: opts.options,
36
+ events: opts.events,
37
+ chatHistory: opts.chatHistory,
38
+ enableUserDataTraining: opts.enableUserDataTraining ?? false,
39
+ timestamp: opts.timestamp ?? Date.now(),
40
+ };
41
+ }
42
+
43
+ // TODO(brian): PR5 - Add uploadSessionReport() function that creates multipart form with:
44
+ // - header: protobuf MetricsRecordingHeader (room_id, duration, start_time)
45
+ // - chat_history: JSON serialized chat history (use sessionReportToJSON)
46
+ // - audio: audio recording file if available (ogg format)
47
+ // - Uploads to LiveKit Cloud observability endpoint with JWT auth
48
+ export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
49
+ const events: Record<string, unknown>[] = [];
50
+
51
+ for (const event of report.events) {
52
+ if (event.type === 'metrics_collected') {
53
+ continue; // metrics are too noisy, Cloud is using the chat_history as the source of truth
54
+ }
55
+
56
+ events.push({ ...event });
57
+ }
58
+
59
+ return {
60
+ job_id: report.jobId,
61
+ room_id: report.roomId,
62
+ room: report.room,
63
+ events,
64
+ options: {
65
+ allow_interruptions: report.options.allowInterruptions,
66
+ discard_audio_if_uninterruptible: report.options.discardAudioIfUninterruptible,
67
+ min_interruption_duration: report.options.minInterruptionDuration,
68
+ min_interruption_words: report.options.minInterruptionWords,
69
+ min_endpointing_delay: report.options.minEndpointingDelay,
70
+ max_endpointing_delay: report.options.maxEndpointingDelay,
71
+ max_tool_steps: report.options.maxToolSteps,
72
+ },
73
+ chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
74
+ enable_user_data_training: report.enableUserDataTraining,
75
+ timestamp: report.timestamp,
76
+ };
77
+ }