@livekit/agents 1.0.34 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/cli.cjs.map +1 -1
  2. package/dist/inference/api_protos.d.cts +4 -4
  3. package/dist/inference/api_protos.d.ts +4 -4
  4. package/dist/inference/llm.cjs +30 -3
  5. package/dist/inference/llm.cjs.map +1 -1
  6. package/dist/inference/llm.d.cts +3 -1
  7. package/dist/inference/llm.d.ts +3 -1
  8. package/dist/inference/llm.d.ts.map +1 -1
  9. package/dist/inference/llm.js +30 -3
  10. package/dist/inference/llm.js.map +1 -1
  11. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  12. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  13. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  14. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  15. package/dist/ipc/job_proc_lazy_main.js +1 -1
  16. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  17. package/dist/llm/chat_context.cjs +20 -2
  18. package/dist/llm/chat_context.cjs.map +1 -1
  19. package/dist/llm/chat_context.d.cts +9 -0
  20. package/dist/llm/chat_context.d.ts +9 -0
  21. package/dist/llm/chat_context.d.ts.map +1 -1
  22. package/dist/llm/chat_context.js +20 -2
  23. package/dist/llm/chat_context.js.map +1 -1
  24. package/dist/llm/llm.cjs.map +1 -1
  25. package/dist/llm/llm.d.cts +1 -0
  26. package/dist/llm/llm.d.ts +1 -0
  27. package/dist/llm/llm.d.ts.map +1 -1
  28. package/dist/llm/llm.js.map +1 -1
  29. package/dist/llm/provider_format/openai.cjs +43 -20
  30. package/dist/llm/provider_format/openai.cjs.map +1 -1
  31. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  32. package/dist/llm/provider_format/openai.js +43 -20
  33. package/dist/llm/provider_format/openai.js.map +1 -1
  34. package/dist/llm/provider_format/openai.test.cjs +35 -0
  35. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  36. package/dist/llm/provider_format/openai.test.js +35 -0
  37. package/dist/llm/provider_format/openai.test.js.map +1 -1
  38. package/dist/llm/provider_format/utils.cjs +1 -1
  39. package/dist/llm/provider_format/utils.cjs.map +1 -1
  40. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  41. package/dist/llm/provider_format/utils.js +1 -1
  42. package/dist/llm/provider_format/utils.js.map +1 -1
  43. package/dist/voice/agent_activity.cjs +19 -19
  44. package/dist/voice/agent_activity.cjs.map +1 -1
  45. package/dist/voice/agent_activity.d.ts.map +1 -1
  46. package/dist/voice/agent_activity.js +19 -19
  47. package/dist/voice/agent_activity.js.map +1 -1
  48. package/dist/voice/agent_session.cjs +64 -25
  49. package/dist/voice/agent_session.cjs.map +1 -1
  50. package/dist/voice/agent_session.d.cts +25 -1
  51. package/dist/voice/agent_session.d.ts +25 -1
  52. package/dist/voice/agent_session.d.ts.map +1 -1
  53. package/dist/voice/agent_session.js +64 -25
  54. package/dist/voice/agent_session.js.map +1 -1
  55. package/dist/voice/background_audio.cjs.map +1 -1
  56. package/dist/voice/generation.cjs +2 -1
  57. package/dist/voice/generation.cjs.map +1 -1
  58. package/dist/voice/generation.d.ts.map +1 -1
  59. package/dist/voice/generation.js +2 -1
  60. package/dist/voice/generation.js.map +1 -1
  61. package/dist/voice/index.cjs +14 -1
  62. package/dist/voice/index.cjs.map +1 -1
  63. package/dist/voice/index.d.cts +1 -0
  64. package/dist/voice/index.d.ts +1 -0
  65. package/dist/voice/index.d.ts.map +1 -1
  66. package/dist/voice/index.js +3 -1
  67. package/dist/voice/index.js.map +1 -1
  68. package/dist/voice/room_io/room_io.cjs +1 -0
  69. package/dist/voice/room_io/room_io.cjs.map +1 -1
  70. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  71. package/dist/voice/room_io/room_io.js +1 -0
  72. package/dist/voice/room_io/room_io.js.map +1 -1
  73. package/dist/voice/speech_handle.cjs +12 -3
  74. package/dist/voice/speech_handle.cjs.map +1 -1
  75. package/dist/voice/speech_handle.d.cts +12 -2
  76. package/dist/voice/speech_handle.d.ts +12 -2
  77. package/dist/voice/speech_handle.d.ts.map +1 -1
  78. package/dist/voice/speech_handle.js +10 -2
  79. package/dist/voice/speech_handle.js.map +1 -1
  80. package/dist/voice/testing/index.cjs +52 -0
  81. package/dist/voice/testing/index.cjs.map +1 -0
  82. package/dist/voice/testing/index.d.cts +20 -0
  83. package/dist/voice/testing/index.d.ts +20 -0
  84. package/dist/voice/testing/index.d.ts.map +1 -0
  85. package/dist/voice/testing/index.js +31 -0
  86. package/dist/voice/testing/index.js.map +1 -0
  87. package/dist/voice/testing/run_result.cjs +477 -0
  88. package/dist/voice/testing/run_result.cjs.map +1 -0
  89. package/dist/voice/testing/run_result.d.cts +226 -0
  90. package/dist/voice/testing/run_result.d.ts +226 -0
  91. package/dist/voice/testing/run_result.d.ts.map +1 -0
  92. package/dist/voice/testing/run_result.js +451 -0
  93. package/dist/voice/testing/run_result.js.map +1 -0
  94. package/dist/voice/testing/types.cjs +46 -0
  95. package/dist/voice/testing/types.cjs.map +1 -0
  96. package/dist/voice/testing/types.d.cts +83 -0
  97. package/dist/voice/testing/types.d.ts +83 -0
  98. package/dist/voice/testing/types.d.ts.map +1 -0
  99. package/dist/voice/testing/types.js +19 -0
  100. package/dist/voice/testing/types.js.map +1 -0
  101. package/package.json +3 -3
  102. package/src/inference/llm.ts +42 -3
  103. package/src/ipc/job_proc_lazy_main.ts +1 -1
  104. package/src/llm/chat_context.ts +32 -2
  105. package/src/llm/llm.ts +1 -0
  106. package/src/llm/provider_format/openai.test.ts +40 -0
  107. package/src/llm/provider_format/openai.ts +46 -19
  108. package/src/llm/provider_format/utils.ts +5 -1
  109. package/src/voice/agent_activity.ts +24 -22
  110. package/src/voice/agent_session.ts +73 -28
  111. package/src/voice/generation.ts +1 -0
  112. package/src/voice/index.ts +1 -0
  113. package/src/voice/room_io/room_io.ts +1 -0
  114. package/src/voice/speech_handle.ts +24 -4
  115. package/src/voice/testing/index.ts +49 -0
  116. package/src/voice/testing/run_result.ts +576 -0
  117. package/src/voice/testing/types.ts +118 -0
@@ -61,6 +61,7 @@ import { RecorderIO } from './recorder_io/index.js';
61
61
  import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
62
62
  import type { UnknownUserData } from './run_context.js';
63
63
  import type { SpeechHandle } from './speech_handle.js';
64
+ import { RunResult } from './testing/run_result.js';
64
65
 
65
66
  export interface VoiceOptions {
66
67
  allowInterruptions: boolean;
@@ -167,6 +168,9 @@ export class AgentSession<
167
168
  /** @internal - Timestamp when the session started (milliseconds) */
168
169
  _startedAt?: number;
169
170
 
171
+ /** @internal - Current run state for testing */
172
+ _globalRunState?: RunResult;
173
+
170
174
  constructor(opts: AgentSessionOptions<UserData>) {
171
175
  super();
172
176
 
@@ -272,7 +276,7 @@ export class AgentSession<
272
276
  span,
273
277
  }: {
274
278
  agent: Agent;
275
- room: Room;
279
+ room?: Room;
276
280
  inputOptions?: Partial<RoomInputOptions>;
277
281
  outputOptions?: Partial<RoomOutputOptions>;
278
282
  span: Span;
@@ -283,41 +287,45 @@ export class AgentSession<
283
287
  this._updateAgentState('initializing');
284
288
 
285
289
  const tasks: Promise<void>[] = [];
286
- // Check for existing input/output configuration and warn if needed
287
- if (this.input.audio && inputOptions?.audioEnabled !== false) {
288
- this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
289
- }
290
290
 
291
- if (this.output.audio && outputOptions?.audioEnabled !== false) {
292
- this.logger.warn(
293
- 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
294
- );
295
- }
291
+ if (room && !this.roomIO) {
292
+ // Check for existing input/output configuration and warn if needed
293
+ if (this.input.audio && inputOptions?.audioEnabled !== false) {
294
+ this.logger.warn(
295
+ 'RoomIO audio input is enabled but input.audio is already set, ignoring..',
296
+ );
297
+ }
296
298
 
297
- if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
298
- this.logger.warn(
299
- 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
300
- );
301
- }
299
+ if (this.output.audio && outputOptions?.audioEnabled !== false) {
300
+ this.logger.warn(
301
+ 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
302
+ );
303
+ }
302
304
 
303
- this.roomIO = new RoomIO({
304
- agentSession: this,
305
- room,
306
- inputOptions,
307
- outputOptions,
308
- });
309
- this.roomIO.start();
305
+ if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
306
+ this.logger.warn(
307
+ 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
308
+ );
309
+ }
310
+
311
+ this.roomIO = new RoomIO({
312
+ agentSession: this,
313
+ room,
314
+ inputOptions,
315
+ outputOptions,
316
+ });
317
+ this.roomIO.start();
318
+ }
310
319
 
311
320
  let ctx: JobContext | undefined = undefined;
312
321
  try {
313
322
  ctx = getJobContext();
314
- } catch (error) {
323
+ } catch {
315
324
  // JobContext is not available in evals
316
- this.logger.warn('JobContext is not available');
317
325
  }
318
326
 
319
327
  if (ctx) {
320
- if (ctx.room === room && !room.isConnected) {
328
+ if (room && ctx.room === room && !room.isConnected) {
321
329
  this.logger.debug('Auto-connecting to room via job context');
322
330
  tasks.push(ctx.connect());
323
331
  }
@@ -370,7 +378,7 @@ export class AgentSession<
370
378
  record,
371
379
  }: {
372
380
  agent: Agent;
373
- room: Room;
381
+ room?: Room;
374
382
  inputOptions?: Partial<RoomInputOptions>;
375
383
  outputOptions?: Partial<RoomOutputOptions>;
376
384
  record?: boolean;
@@ -497,13 +505,50 @@ export class AgentSession<
497
505
 
498
506
  // attach to the session span if called outside of the AgentSession
499
507
  const activeSpan = trace.getActiveSpan();
508
+ let handle: SpeechHandle;
500
509
  if (!activeSpan && this.rootSpanContext) {
501
- return otelContext.with(this.rootSpanContext, () =>
510
+ handle = otelContext.with(this.rootSpanContext, () =>
502
511
  doGenerateReply(this.activity!, this.nextActivity),
503
512
  );
513
+ } else {
514
+ handle = doGenerateReply(this.activity!, this.nextActivity);
504
515
  }
505
516
 
506
- return doGenerateReply(this.activity!, this.nextActivity);
517
+ if (this._globalRunState) {
518
+ this._globalRunState._watchHandle(handle);
519
+ }
520
+
521
+ return handle;
522
+ }
523
+
524
+ /**
525
+ * Run a test with user input and return a result for assertions.
526
+ *
527
+ * This method is primarily used for testing agent behavior without
528
+ * requiring a real room connection.
529
+ *
530
+ * @example
531
+ * ```typescript
532
+ * const result = await session.run({ userInput: 'Hello' });
533
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
534
+ * result.expect.noMoreEvents();
535
+ * ```
536
+ *
537
+ * @param options - Run options including user input
538
+ * @returns A RunResult that resolves when the agent finishes responding
539
+ *
540
+ * TODO: Add outputType parameter for typed outputs (parity with Python)
541
+ */
542
+ run(options: { userInput: string }): RunResult {
543
+ if (this._globalRunState && !this._globalRunState.done()) {
544
+ throw new Error('nested runs are not supported');
545
+ }
546
+
547
+ const runState = new RunResult({ userInput: options.userInput });
548
+ this._globalRunState = runState;
549
+ this.generateReply({ userInput: options.userInput });
550
+
551
+ return runState;
507
552
  }
508
553
 
509
554
  private async updateActivity(agent: Agent): Promise<void> {
@@ -444,6 +444,7 @@ export function performLLMInference(
444
444
  args: tool.args,
445
445
  // Preserve thought signature for Gemini 3+ thinking mode
446
446
  thoughtSignature: tool.thoughtSignature,
447
+ extra: tool.extra || {},
447
448
  });
448
449
 
449
450
  data.generatedToolCalls.push(toolCall);
@@ -10,3 +10,4 @@ export { type TimedString } from './io.js';
10
10
  export * from './report.js';
11
11
  export * from './room_io/index.js';
12
12
  export { RunContext } from './run_context.js';
13
+ export * as testing from './testing/index.js';
@@ -51,6 +51,7 @@ const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev:
51
51
  };
52
52
 
53
53
  const DEFAULT_PARTICIPANT_KINDS: ParticipantKind[] = [
54
+ ParticipantKind.CONNECTOR,
54
55
  ParticipantKind.SIP,
55
56
  ParticipantKind.STANDARD,
56
57
  ];
@@ -2,10 +2,25 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { ChatItem } from '../llm/index.js';
5
- import { Event, Future, shortuuid } from '../utils.js';
6
5
  import type { Task } from '../utils.js';
6
+ import { Event, Future, shortuuid } from '../utils.js';
7
7
  import { asyncLocalStorage } from './agent.js';
8
8
 
9
+ /** Symbol used to identify SpeechHandle instances */
10
+ const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
11
+
12
+ /**
13
+ * Type guard to check if a value is a SpeechHandle.
14
+ */
15
+ export function isSpeechHandle(value: unknown): value is SpeechHandle {
16
+ return (
17
+ typeof value === 'object' &&
18
+ value !== null &&
19
+ SPEECH_HANDLE_SYMBOL in value &&
20
+ (value as Record<symbol, boolean>)[SPEECH_HANDLE_SYMBOL] === true
21
+ );
22
+ }
23
+
9
24
  export class SpeechHandle {
10
25
  /** Priority for messages that should be played after all other messages in the queue */
11
26
  static SPEECH_PRIORITY_LOW = 0;
@@ -18,16 +33,21 @@ export class SpeechHandle {
18
33
  private authorizedEvent = new Event();
19
34
  private scheduledFut = new Future<void>();
20
35
  private doneFut = new Future<void>();
21
-
22
36
  private generations: Future<void>[] = [];
37
+ private _chatItems: ChatItem[] = [];
38
+
23
39
  /** @internal */
24
40
  _tasks: Task<void>[] = [];
25
- private _chatItems: ChatItem[] = [];
26
- private _numSteps = 1;
41
+
42
+ /** @internal */
43
+ _numSteps = 1;
27
44
 
28
45
  private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
29
46
  private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
30
47
 
48
+ /** @internal Symbol marker for type identification */
49
+ readonly [SPEECH_HANDLE_SYMBOL] = true;
50
+
31
51
  constructor(
32
52
  private _id: string,
33
53
  private _allowInterruptions: boolean,
@@ -0,0 +1,49 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Testing utilities for agent evaluation.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { AgentSession, Agent, voice } from '@livekit/agents';
11
+ *
12
+ * const session = new AgentSession({ llm });
13
+ * await session.start(agent);
14
+ *
15
+ * const result = await session.run({ userInput: 'Hello' });
16
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
17
+ * result.expect.noMoreEvents();
18
+ * ```
19
+ *
20
+ * @packageDocumentation
21
+ */
22
+
23
+ export {
24
+ AgentHandoffAssert,
25
+ AssertionError,
26
+ EventAssert,
27
+ FunctionCallAssert,
28
+ FunctionCallOutputAssert,
29
+ MessageAssert,
30
+ RunAssert,
31
+ RunResult,
32
+ } from './run_result.js';
33
+
34
+ export {
35
+ isAgentHandoffEvent,
36
+ isChatMessageEvent,
37
+ isFunctionCallEvent,
38
+ isFunctionCallOutputEvent,
39
+ type AgentHandoffAssertOptions,
40
+ type AgentHandoffEvent,
41
+ type ChatMessageEvent,
42
+ type EventType,
43
+ type FunctionCallAssertOptions,
44
+ type FunctionCallEvent,
45
+ type FunctionCallOutputAssertOptions,
46
+ type FunctionCallOutputEvent,
47
+ type MessageAssertOptions,
48
+ type RunEvent,
49
+ } from './types.js';