@livekit/agents 1.0.35 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/voice/agent_activity.cjs +19 -19
  2. package/dist/voice/agent_activity.cjs.map +1 -1
  3. package/dist/voice/agent_activity.d.ts.map +1 -1
  4. package/dist/voice/agent_activity.js +19 -19
  5. package/dist/voice/agent_activity.js.map +1 -1
  6. package/dist/voice/agent_session.cjs +64 -25
  7. package/dist/voice/agent_session.cjs.map +1 -1
  8. package/dist/voice/agent_session.d.cts +25 -1
  9. package/dist/voice/agent_session.d.ts +25 -1
  10. package/dist/voice/agent_session.d.ts.map +1 -1
  11. package/dist/voice/agent_session.js +64 -25
  12. package/dist/voice/agent_session.js.map +1 -1
  13. package/dist/voice/index.cjs +14 -1
  14. package/dist/voice/index.cjs.map +1 -1
  15. package/dist/voice/index.d.cts +1 -0
  16. package/dist/voice/index.d.ts +1 -0
  17. package/dist/voice/index.d.ts.map +1 -1
  18. package/dist/voice/index.js +3 -1
  19. package/dist/voice/index.js.map +1 -1
  20. package/dist/voice/room_io/room_io.cjs +1 -0
  21. package/dist/voice/room_io/room_io.cjs.map +1 -1
  22. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  23. package/dist/voice/room_io/room_io.js +1 -0
  24. package/dist/voice/room_io/room_io.js.map +1 -1
  25. package/dist/voice/speech_handle.cjs +12 -3
  26. package/dist/voice/speech_handle.cjs.map +1 -1
  27. package/dist/voice/speech_handle.d.cts +12 -2
  28. package/dist/voice/speech_handle.d.ts +12 -2
  29. package/dist/voice/speech_handle.d.ts.map +1 -1
  30. package/dist/voice/speech_handle.js +10 -2
  31. package/dist/voice/speech_handle.js.map +1 -1
  32. package/dist/voice/testing/index.cjs +52 -0
  33. package/dist/voice/testing/index.cjs.map +1 -0
  34. package/dist/voice/testing/index.d.cts +20 -0
  35. package/dist/voice/testing/index.d.ts +20 -0
  36. package/dist/voice/testing/index.d.ts.map +1 -0
  37. package/dist/voice/testing/index.js +31 -0
  38. package/dist/voice/testing/index.js.map +1 -0
  39. package/dist/voice/testing/run_result.cjs +477 -0
  40. package/dist/voice/testing/run_result.cjs.map +1 -0
  41. package/dist/voice/testing/run_result.d.cts +226 -0
  42. package/dist/voice/testing/run_result.d.ts +226 -0
  43. package/dist/voice/testing/run_result.d.ts.map +1 -0
  44. package/dist/voice/testing/run_result.js +451 -0
  45. package/dist/voice/testing/run_result.js.map +1 -0
  46. package/dist/voice/testing/types.cjs +46 -0
  47. package/dist/voice/testing/types.cjs.map +1 -0
  48. package/dist/voice/testing/types.d.cts +83 -0
  49. package/dist/voice/testing/types.d.ts +83 -0
  50. package/dist/voice/testing/types.d.ts.map +1 -0
  51. package/dist/voice/testing/types.js +19 -0
  52. package/dist/voice/testing/types.js.map +1 -0
  53. package/package.json +3 -3
  54. package/src/voice/agent_activity.ts +24 -22
  55. package/src/voice/agent_session.ts +73 -28
  56. package/src/voice/index.ts +1 -0
  57. package/src/voice/room_io/room_io.ts +1 -0
  58. package/src/voice/speech_handle.ts +24 -4
  59. package/src/voice/testing/index.ts +49 -0
  60. package/src/voice/testing/run_result.ts +576 -0
  61. package/src/voice/testing/types.ts +118 -0
@@ -0,0 +1,83 @@
1
+ import type { AgentHandoffItem, ChatMessage, ChatRole, FunctionCall, FunctionCallOutput } from '../../llm/chat_context.js';
2
+ import type { Agent } from '../agent.js';
3
+ /**
4
+ * Event representing an assistant or user message in the conversation.
5
+ */
6
+ export interface ChatMessageEvent {
7
+ type: 'message';
8
+ item: ChatMessage;
9
+ }
10
+ /**
11
+ * Event representing a function/tool call initiated by the LLM.
12
+ */
13
+ export interface FunctionCallEvent {
14
+ type: 'function_call';
15
+ item: FunctionCall;
16
+ }
17
+ /**
18
+ * Event representing the output/result of a function call.
19
+ */
20
+ export interface FunctionCallOutputEvent {
21
+ type: 'function_call_output';
22
+ item: FunctionCallOutput;
23
+ }
24
+ /**
25
+ * Event representing an agent handoff (switching from one agent to another).
26
+ */
27
+ export interface AgentHandoffEvent {
28
+ type: 'agent_handoff';
29
+ item: AgentHandoffItem;
30
+ oldAgent?: Agent;
31
+ newAgent: Agent;
32
+ }
33
+ /**
34
+ * Union type of all possible run events that can occur during a test run.
35
+ */
36
+ export type RunEvent = ChatMessageEvent | FunctionCallEvent | FunctionCallOutputEvent | AgentHandoffEvent;
37
+ /**
38
+ * Type guard to check if an event is a ChatMessageEvent.
39
+ */
40
+ export declare function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent;
41
+ /**
42
+ * Type guard to check if an event is a FunctionCallEvent.
43
+ */
44
+ export declare function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent;
45
+ /**
46
+ * Type guard to check if an event is a FunctionCallOutputEvent.
47
+ */
48
+ export declare function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent;
49
+ /**
50
+ * Type guard to check if an event is an AgentHandoffEvent.
51
+ */
52
+ export declare function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent;
53
+ /**
54
+ * Options for message assertion.
55
+ */
56
+ export interface MessageAssertOptions {
57
+ role?: ChatRole;
58
+ }
59
+ /**
60
+ * Options for function call assertion.
61
+ */
62
+ export interface FunctionCallAssertOptions {
63
+ name?: string;
64
+ args?: Record<string, unknown>;
65
+ }
66
+ /**
67
+ * Options for function call output assertion.
68
+ */
69
+ export interface FunctionCallOutputAssertOptions {
70
+ output?: string;
71
+ isError?: boolean;
72
+ }
73
+ /**
74
+ * Options for agent handoff assertion.
75
+ */
76
+ export interface AgentHandoffAssertOptions {
77
+ newAgentType?: new (...args: any[]) => Agent;
78
+ }
79
+ /**
80
+ * Event type literals for type-safe event filtering.
81
+ */
82
+ export type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';
83
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/voice/testing/types.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,gBAAgB,EAChB,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kBAAkB,EACnB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,WAAW,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,gBAAgB,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC;IACjB,QAAQ,EAAE,KAAK,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,iBAAiB,GACjB,uBAAuB,GACvB,iBAAiB,CAAC;AAEtB;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,gBAAgB,CAE7E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,uBAAuB,CAE3F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,IAAI,CAAC,EAAE,QAAQ,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IAExC,YAAY,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,KAAK,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,eAAe,GAAG,sBAAsB,GAAG,eAAe,CAAC"}
@@ -0,0 +1,19 @@
1
+ function isChatMessageEvent(event) {
2
+ return event.type === "message";
3
+ }
4
+ function isFunctionCallEvent(event) {
5
+ return event.type === "function_call";
6
+ }
7
+ function isFunctionCallOutputEvent(event) {
8
+ return event.type === "function_call_output";
9
+ }
10
+ function isAgentHandoffEvent(event) {
11
+ return event.type === "agent_handoff";
12
+ }
13
+ export {
14
+ isAgentHandoffEvent,
15
+ isChatMessageEvent,
16
+ isFunctionCallEvent,
17
+ isFunctionCallOutputEvent
18
+ };
19
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/voice/testing/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n AgentHandoffItem,\n ChatMessage,\n ChatRole,\n FunctionCall,\n FunctionCallOutput,\n} from '../../llm/chat_context.js';\nimport type { Agent } from '../agent.js';\n\n/**\n * Event representing an assistant or user message in the conversation.\n */\nexport interface ChatMessageEvent {\n type: 'message';\n item: ChatMessage;\n}\n\n/**\n * Event representing a function/tool call initiated by the LLM.\n */\nexport interface FunctionCallEvent {\n type: 'function_call';\n item: FunctionCall;\n}\n\n/**\n * Event representing the output/result of a function call.\n */\nexport interface FunctionCallOutputEvent {\n type: 'function_call_output';\n item: FunctionCallOutput;\n}\n\n/**\n * Event representing an agent handoff (switching from one agent to another).\n */\nexport interface AgentHandoffEvent {\n type: 'agent_handoff';\n item: AgentHandoffItem;\n oldAgent?: Agent;\n newAgent: Agent;\n}\n\n/**\n * Union type of all possible run events that can occur during a test run.\n */\nexport type RunEvent =\n | ChatMessageEvent\n | FunctionCallEvent\n | FunctionCallOutputEvent\n | AgentHandoffEvent;\n\n/**\n * Type guard to check if an event is a ChatMessageEvent.\n */\nexport function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent {\n return event.type === 'message';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallEvent.\n */\nexport function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent {\n return event.type === 'function_call';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallOutputEvent.\n */\nexport function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent {\n return event.type === 'function_call_output';\n}\n\n/**\n * Type guard to check if an event is an AgentHandoffEvent.\n */\nexport function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent {\n return event.type === 'agent_handoff';\n}\n\n/**\n * Options for message assertion.\n */\nexport interface MessageAssertOptions {\n role?: ChatRole;\n}\n\n/**\n * Options for function call assertion.\n */\nexport interface FunctionCallAssertOptions {\n name?: string;\n args?: Record<string, unknown>;\n}\n\n/**\n * Options for function call output assertion.\n */\nexport interface FunctionCallOutputAssertOptions {\n output?: string;\n isError?: boolean;\n}\n\n/**\n * Options for agent handoff assertion.\n */\nexport interface AgentHandoffAssertOptions {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n newAgentType?: new (...args: any[]) => Agent;\n}\n\n/**\n * Event type literals for type-safe event filtering.\n */\nexport type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';\n"],"mappings":"AA0DO,SAAS,mBAAmB,OAA4C;AAC7E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,0BAA0B,OAAmD;AAC3F,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents",
3
- "version": "1.0.35",
3
+ "version": "1.0.36",
4
4
  "description": "LiveKit Agents - Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -26,7 +26,7 @@
26
26
  "README.md"
27
27
  ],
28
28
  "devDependencies": {
29
- "@livekit/rtc-node": "^0.13.22",
29
+ "@livekit/rtc-node": "^0.13.24",
30
30
  "@microsoft/api-extractor": "^7.35.0",
31
31
  "@types/fluent-ffmpeg": "^2.1.28",
32
32
  "@types/json-schema": "^7.0.15",
@@ -70,7 +70,7 @@
70
70
  "zod-to-json-schema": "^3.24.6"
71
71
  },
72
72
  "peerDependencies": {
73
- "@livekit/rtc-node": "^0.13.22",
73
+ "@livekit/rtc-node": "^0.13.24",
74
74
  "zod": "^3.25.76 || ^4.1.8"
75
75
  },
76
76
  "scripts": {
@@ -1350,11 +1350,14 @@ export class AgentActivity implements RecognitionHooks {
1350
1350
  );
1351
1351
  tasks.push(llmTask);
1352
1352
 
1353
- const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
1354
-
1355
1353
  let ttsTask: Task<void> | null = null;
1356
1354
  let ttsStream: ReadableStream<AudioFrame> | null = null;
1355
+ let llmOutput: ReadableStream<string>;
1356
+
1357
1357
  if (audioOutput) {
1358
+ // Only tee the stream when we need TTS
1359
+ const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
1360
+ llmOutput = textOutput;
1358
1361
  [ttsTask, ttsStream] = performTTSInference(
1359
1362
  (...args) => this.agent.ttsNode(...args),
1360
1363
  ttsTextInput,
@@ -1362,6 +1365,9 @@ export class AgentActivity implements RecognitionHooks {
1362
1365
  replyAbortController,
1363
1366
  );
1364
1367
  tasks.push(ttsTask);
1368
+ } else {
1369
+ // No TTS needed, use the stream directly
1370
+ llmOutput = llmGenData.textStream;
1365
1371
  }
1366
1372
 
1367
1373
  await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
1421
1427
  //TODO(AJS-272): before executing tools, make sure we generated all the text
1422
1428
  // (this ensure everything is kept ordered)
1423
1429
 
1424
- const onToolExecutionStarted = (_: FunctionCall) => {
1425
- // TODO(brian): handle speech_handle item_added
1430
+ const onToolExecutionStarted = (f: FunctionCall) => {
1431
+ speechHandle._itemAdded([f]);
1432
+ this.agent._chatCtx.items.push(f);
1433
+ this.agentSession._toolItemsAdded([f]);
1426
1434
  };
1427
1435
 
1428
- const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
1429
- // TODO(brian): handle speech_handle item_added
1436
+ const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
1437
+ if (out.toolCallOutput) {
1438
+ speechHandle._itemAdded([out.toolCallOutput]);
1439
+ }
1430
1440
  };
1431
1441
 
1432
1442
  const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
1501
1511
  });
1502
1512
  chatCtx.insert(message);
1503
1513
  this.agent._chatCtx.insert(message);
1514
+ speechHandle._itemAdded([message]);
1504
1515
  this.agentSession._conversationItemAdded(message);
1505
1516
  }
1506
1517
 
@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
1528
1539
  });
1529
1540
  chatCtx.insert(message);
1530
1541
  this.agent._chatCtx.insert(message);
1542
+ speechHandle._itemAdded([message]);
1531
1543
  this.agentSession._conversationItemAdded(message);
1532
1544
  this.logger.info(
1533
1545
  { speech_id: speechHandle.id, message: textOut.text },
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
1612
1624
  if (shouldGenerateToolReply) {
1613
1625
  chatCtx.insert(toolMessages);
1614
1626
 
1615
- const handle = SpeechHandle.create({
1616
- allowInterruptions: speechHandle.allowInterruptions,
1617
- stepIndex: speechHandle._stepIndex + 1,
1618
- parent: speechHandle,
1619
- });
1620
- this.agentSession.emit(
1621
- AgentSessionEventTypes.SpeechCreated,
1622
- createSpeechCreatedEvent({
1623
- userInitiated: false,
1624
- source: 'tool_response',
1625
- speechHandle: handle,
1626
- }),
1627
- );
1627
+ // Increment step count on SAME handle (parity with Python agent_activity.py L2081)
1628
+ speechHandle._numSteps += 1;
1628
1629
 
1629
1630
  // Avoid setting tool_choice to "required" or a specific function when
1630
1631
  // passing tool response back to the LLM
1631
1632
  const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
1632
1633
 
1634
+ // Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
1633
1635
  const toolResponseTask = this.createSpeechTask({
1634
1636
  task: Task.from(() =>
1635
1637
  this.pipelineReplyTask(
1636
- handle,
1638
+ speechHandle,
1637
1639
  chatCtx,
1638
1640
  toolCtx,
1639
1641
  { toolChoice: respondToolChoice },
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
1643
1645
  toolMessages,
1644
1646
  ),
1645
1647
  ),
1646
- ownedSpeechHandle: handle,
1648
+ ownedSpeechHandle: speechHandle,
1647
1649
  name: 'AgentActivity.pipelineReply',
1648
1650
  });
1649
1651
 
1650
1652
  toolResponseTask.finally(() => this.onPipelineReplyDone());
1651
1653
 
1652
- this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1654
+ this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1653
1655
  } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1654
1656
  for (const msg of toolMessages) {
1655
1657
  msg.createdAt = replyStartedAt;
@@ -61,6 +61,7 @@ import { RecorderIO } from './recorder_io/index.js';
61
61
  import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
62
62
  import type { UnknownUserData } from './run_context.js';
63
63
  import type { SpeechHandle } from './speech_handle.js';
64
+ import { RunResult } from './testing/run_result.js';
64
65
 
65
66
  export interface VoiceOptions {
66
67
  allowInterruptions: boolean;
@@ -167,6 +168,9 @@ export class AgentSession<
167
168
  /** @internal - Timestamp when the session started (milliseconds) */
168
169
  _startedAt?: number;
169
170
 
171
+ /** @internal - Current run state for testing */
172
+ _globalRunState?: RunResult;
173
+
170
174
  constructor(opts: AgentSessionOptions<UserData>) {
171
175
  super();
172
176
 
@@ -272,7 +276,7 @@ export class AgentSession<
272
276
  span,
273
277
  }: {
274
278
  agent: Agent;
275
- room: Room;
279
+ room?: Room;
276
280
  inputOptions?: Partial<RoomInputOptions>;
277
281
  outputOptions?: Partial<RoomOutputOptions>;
278
282
  span: Span;
@@ -283,41 +287,45 @@ export class AgentSession<
283
287
  this._updateAgentState('initializing');
284
288
 
285
289
  const tasks: Promise<void>[] = [];
286
- // Check for existing input/output configuration and warn if needed
287
- if (this.input.audio && inputOptions?.audioEnabled !== false) {
288
- this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
289
- }
290
290
 
291
- if (this.output.audio && outputOptions?.audioEnabled !== false) {
292
- this.logger.warn(
293
- 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
294
- );
295
- }
291
+ if (room && !this.roomIO) {
292
+ // Check for existing input/output configuration and warn if needed
293
+ if (this.input.audio && inputOptions?.audioEnabled !== false) {
294
+ this.logger.warn(
295
+ 'RoomIO audio input is enabled but input.audio is already set, ignoring..',
296
+ );
297
+ }
296
298
 
297
- if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
298
- this.logger.warn(
299
- 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
300
- );
301
- }
299
+ if (this.output.audio && outputOptions?.audioEnabled !== false) {
300
+ this.logger.warn(
301
+ 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
302
+ );
303
+ }
302
304
 
303
- this.roomIO = new RoomIO({
304
- agentSession: this,
305
- room,
306
- inputOptions,
307
- outputOptions,
308
- });
309
- this.roomIO.start();
305
+ if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
306
+ this.logger.warn(
307
+ 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
308
+ );
309
+ }
310
+
311
+ this.roomIO = new RoomIO({
312
+ agentSession: this,
313
+ room,
314
+ inputOptions,
315
+ outputOptions,
316
+ });
317
+ this.roomIO.start();
318
+ }
310
319
 
311
320
  let ctx: JobContext | undefined = undefined;
312
321
  try {
313
322
  ctx = getJobContext();
314
- } catch (error) {
323
+ } catch {
315
324
  // JobContext is not available in evals
316
- this.logger.warn('JobContext is not available');
317
325
  }
318
326
 
319
327
  if (ctx) {
320
- if (ctx.room === room && !room.isConnected) {
328
+ if (room && ctx.room === room && !room.isConnected) {
321
329
  this.logger.debug('Auto-connecting to room via job context');
322
330
  tasks.push(ctx.connect());
323
331
  }
@@ -370,7 +378,7 @@ export class AgentSession<
370
378
  record,
371
379
  }: {
372
380
  agent: Agent;
373
- room: Room;
381
+ room?: Room;
374
382
  inputOptions?: Partial<RoomInputOptions>;
375
383
  outputOptions?: Partial<RoomOutputOptions>;
376
384
  record?: boolean;
@@ -497,13 +505,50 @@ export class AgentSession<
497
505
 
498
506
  // attach to the session span if called outside of the AgentSession
499
507
  const activeSpan = trace.getActiveSpan();
508
+ let handle: SpeechHandle;
500
509
  if (!activeSpan && this.rootSpanContext) {
501
- return otelContext.with(this.rootSpanContext, () =>
510
+ handle = otelContext.with(this.rootSpanContext, () =>
502
511
  doGenerateReply(this.activity!, this.nextActivity),
503
512
  );
513
+ } else {
514
+ handle = doGenerateReply(this.activity!, this.nextActivity);
504
515
  }
505
516
 
506
- return doGenerateReply(this.activity!, this.nextActivity);
517
+ if (this._globalRunState) {
518
+ this._globalRunState._watchHandle(handle);
519
+ }
520
+
521
+ return handle;
522
+ }
523
+
524
+ /**
525
+ * Run a test with user input and return a result for assertions.
526
+ *
527
+ * This method is primarily used for testing agent behavior without
528
+ * requiring a real room connection.
529
+ *
530
+ * @example
531
+ * ```typescript
532
+ * const result = await session.run({ userInput: 'Hello' });
533
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
534
+ * result.expect.noMoreEvents();
535
+ * ```
536
+ *
537
+ * @param options - Run options including user input
538
+ * @returns A RunResult that resolves when the agent finishes responding
539
+ *
540
+ * TODO: Add outputType parameter for typed outputs (parity with Python)
541
+ */
542
+ run(options: { userInput: string }): RunResult {
543
+ if (this._globalRunState && !this._globalRunState.done()) {
544
+ throw new Error('nested runs are not supported');
545
+ }
546
+
547
+ const runState = new RunResult({ userInput: options.userInput });
548
+ this._globalRunState = runState;
549
+ this.generateReply({ userInput: options.userInput });
550
+
551
+ return runState;
507
552
  }
508
553
 
509
554
  private async updateActivity(agent: Agent): Promise<void> {
@@ -10,3 +10,4 @@ export { type TimedString } from './io.js';
10
10
  export * from './report.js';
11
11
  export * from './room_io/index.js';
12
12
  export { RunContext } from './run_context.js';
13
+ export * as testing from './testing/index.js';
@@ -51,6 +51,7 @@ const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev:
51
51
  };
52
52
 
53
53
  const DEFAULT_PARTICIPANT_KINDS: ParticipantKind[] = [
54
+ ParticipantKind.CONNECTOR,
54
55
  ParticipantKind.SIP,
55
56
  ParticipantKind.STANDARD,
56
57
  ];
@@ -2,10 +2,25 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { ChatItem } from '../llm/index.js';
5
- import { Event, Future, shortuuid } from '../utils.js';
6
5
  import type { Task } from '../utils.js';
6
+ import { Event, Future, shortuuid } from '../utils.js';
7
7
  import { asyncLocalStorage } from './agent.js';
8
8
 
9
+ /** Symbol used to identify SpeechHandle instances */
10
+ const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
11
+
12
+ /**
13
+ * Type guard to check if a value is a SpeechHandle.
14
+ */
15
+ export function isSpeechHandle(value: unknown): value is SpeechHandle {
16
+ return (
17
+ typeof value === 'object' &&
18
+ value !== null &&
19
+ SPEECH_HANDLE_SYMBOL in value &&
20
+ (value as Record<symbol, boolean>)[SPEECH_HANDLE_SYMBOL] === true
21
+ );
22
+ }
23
+
9
24
  export class SpeechHandle {
10
25
  /** Priority for messages that should be played after all other messages in the queue */
11
26
  static SPEECH_PRIORITY_LOW = 0;
@@ -18,16 +33,21 @@ export class SpeechHandle {
18
33
  private authorizedEvent = new Event();
19
34
  private scheduledFut = new Future<void>();
20
35
  private doneFut = new Future<void>();
21
-
22
36
  private generations: Future<void>[] = [];
37
+ private _chatItems: ChatItem[] = [];
38
+
23
39
  /** @internal */
24
40
  _tasks: Task<void>[] = [];
25
- private _chatItems: ChatItem[] = [];
26
- private _numSteps = 1;
41
+
42
+ /** @internal */
43
+ _numSteps = 1;
27
44
 
28
45
  private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
29
46
  private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
30
47
 
48
+ /** @internal Symbol marker for type identification */
49
+ readonly [SPEECH_HANDLE_SYMBOL] = true;
50
+
31
51
  constructor(
32
52
  private _id: string,
33
53
  private _allowInterruptions: boolean,
@@ -0,0 +1,49 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Testing utilities for agent evaluation.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { AgentSession, Agent, voice } from '@livekit/agents';
11
+ *
12
+ * const session = new AgentSession({ llm });
13
+ * await session.start(agent);
14
+ *
15
+ * const result = await session.run({ userInput: 'Hello' });
16
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
17
+ * result.expect.noMoreEvents();
18
+ * ```
19
+ *
20
+ * @packageDocumentation
21
+ */
22
+
23
+ export {
24
+ AgentHandoffAssert,
25
+ AssertionError,
26
+ EventAssert,
27
+ FunctionCallAssert,
28
+ FunctionCallOutputAssert,
29
+ MessageAssert,
30
+ RunAssert,
31
+ RunResult,
32
+ } from './run_result.js';
33
+
34
+ export {
35
+ isAgentHandoffEvent,
36
+ isChatMessageEvent,
37
+ isFunctionCallEvent,
38
+ isFunctionCallOutputEvent,
39
+ type AgentHandoffAssertOptions,
40
+ type AgentHandoffEvent,
41
+ type ChatMessageEvent,
42
+ type EventType,
43
+ type FunctionCallAssertOptions,
44
+ type FunctionCallEvent,
45
+ type FunctionCallOutputAssertOptions,
46
+ type FunctionCallOutputEvent,
47
+ type MessageAssertOptions,
48
+ type RunEvent,
49
+ } from './types.js';