@livekit/agents 1.0.36-dev.0 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/dist/index.cjs +1 -3
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +0 -1
  4. package/dist/index.d.ts +0 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +0 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/utils.cjs +2 -15
  9. package/dist/inference/utils.cjs.map +1 -1
  10. package/dist/inference/utils.d.cts +0 -1
  11. package/dist/inference/utils.d.ts +0 -1
  12. package/dist/inference/utils.d.ts.map +1 -1
  13. package/dist/inference/utils.js +1 -13
  14. package/dist/inference/utils.js.map +1 -1
  15. package/dist/stream/stream_channel.cjs +0 -3
  16. package/dist/stream/stream_channel.cjs.map +1 -1
  17. package/dist/stream/stream_channel.d.cts +2 -3
  18. package/dist/stream/stream_channel.d.ts +2 -3
  19. package/dist/stream/stream_channel.d.ts.map +1 -1
  20. package/dist/stream/stream_channel.js +0 -3
  21. package/dist/stream/stream_channel.js.map +1 -1
  22. package/dist/telemetry/trace_types.cjs +0 -15
  23. package/dist/telemetry/trace_types.cjs.map +1 -1
  24. package/dist/telemetry/trace_types.d.cts +0 -5
  25. package/dist/telemetry/trace_types.d.ts +0 -5
  26. package/dist/telemetry/trace_types.d.ts.map +1 -1
  27. package/dist/telemetry/trace_types.js +0 -10
  28. package/dist/telemetry/trace_types.js.map +1 -1
  29. package/dist/voice/agent_activity.cjs +19 -68
  30. package/dist/voice/agent_activity.cjs.map +1 -1
  31. package/dist/voice/agent_activity.d.cts +0 -14
  32. package/dist/voice/agent_activity.d.ts +0 -14
  33. package/dist/voice/agent_activity.d.ts.map +1 -1
  34. package/dist/voice/agent_activity.js +19 -68
  35. package/dist/voice/agent_activity.js.map +1 -1
  36. package/dist/voice/agent_session.cjs +65 -37
  37. package/dist/voice/agent_session.cjs.map +1 -1
  38. package/dist/voice/agent_session.d.cts +25 -4
  39. package/dist/voice/agent_session.d.ts +25 -4
  40. package/dist/voice/agent_session.d.ts.map +1 -1
  41. package/dist/voice/agent_session.js +65 -37
  42. package/dist/voice/agent_session.js.map +1 -1
  43. package/dist/voice/audio_recognition.cjs +2 -124
  44. package/dist/voice/audio_recognition.cjs.map +1 -1
  45. package/dist/voice/audio_recognition.d.cts +1 -32
  46. package/dist/voice/audio_recognition.d.ts +1 -32
  47. package/dist/voice/audio_recognition.d.ts.map +1 -1
  48. package/dist/voice/audio_recognition.js +2 -127
  49. package/dist/voice/audio_recognition.js.map +1 -1
  50. package/dist/voice/index.cjs +14 -1
  51. package/dist/voice/index.cjs.map +1 -1
  52. package/dist/voice/index.d.cts +1 -0
  53. package/dist/voice/index.d.ts +1 -0
  54. package/dist/voice/index.d.ts.map +1 -1
  55. package/dist/voice/index.js +3 -1
  56. package/dist/voice/index.js.map +1 -1
  57. package/dist/voice/room_io/room_io.cjs +1 -0
  58. package/dist/voice/room_io/room_io.cjs.map +1 -1
  59. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  60. package/dist/voice/room_io/room_io.js +1 -0
  61. package/dist/voice/room_io/room_io.js.map +1 -1
  62. package/dist/voice/speech_handle.cjs +12 -3
  63. package/dist/voice/speech_handle.cjs.map +1 -1
  64. package/dist/voice/speech_handle.d.cts +12 -2
  65. package/dist/voice/speech_handle.d.ts +12 -2
  66. package/dist/voice/speech_handle.d.ts.map +1 -1
  67. package/dist/voice/speech_handle.js +10 -2
  68. package/dist/voice/speech_handle.js.map +1 -1
  69. package/dist/voice/testing/index.cjs +54 -0
  70. package/dist/voice/testing/index.cjs.map +1 -0
  71. package/dist/voice/testing/index.d.cts +20 -0
  72. package/dist/voice/testing/index.d.ts +20 -0
  73. package/dist/voice/testing/index.d.ts.map +1 -0
  74. package/dist/voice/testing/index.js +33 -0
  75. package/dist/voice/testing/index.js.map +1 -0
  76. package/dist/voice/testing/run_result.cjs +766 -0
  77. package/dist/voice/testing/run_result.cjs.map +1 -0
  78. package/dist/voice/testing/run_result.d.cts +374 -0
  79. package/dist/voice/testing/run_result.d.ts +374 -0
  80. package/dist/voice/testing/run_result.d.ts.map +1 -0
  81. package/dist/voice/testing/run_result.js +739 -0
  82. package/dist/voice/testing/run_result.js.map +1 -0
  83. package/dist/{inference/interruption/index.cjs → voice/testing/types.cjs} +24 -12
  84. package/dist/voice/testing/types.cjs.map +1 -0
  85. package/dist/voice/testing/types.d.cts +83 -0
  86. package/dist/voice/testing/types.d.ts +83 -0
  87. package/dist/voice/testing/types.d.ts.map +1 -0
  88. package/dist/voice/testing/types.js +19 -0
  89. package/dist/voice/testing/types.js.map +1 -0
  90. package/package.json +3 -4
  91. package/src/index.ts +0 -2
  92. package/src/inference/utils.ts +0 -15
  93. package/src/stream/stream_channel.ts +2 -6
  94. package/src/telemetry/trace_types.ts +0 -7
  95. package/src/voice/agent_activity.ts +24 -83
  96. package/src/voice/agent_session.ts +74 -49
  97. package/src/voice/audio_recognition.ts +1 -161
  98. package/src/voice/index.ts +1 -0
  99. package/src/voice/room_io/room_io.ts +1 -0
  100. package/src/voice/speech_handle.ts +24 -4
  101. package/src/voice/testing/index.ts +50 -0
  102. package/src/voice/testing/run_result.ts +937 -0
  103. package/src/voice/testing/types.ts +118 -0
  104. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +0 -152
  105. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +0 -1
  106. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +0 -50
  107. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +0 -50
  108. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +0 -1
  109. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +0 -125
  110. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +0 -1
  111. package/dist/inference/interruption/InterruptionStream.cjs +0 -310
  112. package/dist/inference/interruption/InterruptionStream.cjs.map +0 -1
  113. package/dist/inference/interruption/InterruptionStream.d.cts +0 -57
  114. package/dist/inference/interruption/InterruptionStream.d.ts +0 -57
  115. package/dist/inference/interruption/InterruptionStream.d.ts.map +0 -1
  116. package/dist/inference/interruption/InterruptionStream.js +0 -288
  117. package/dist/inference/interruption/InterruptionStream.js.map +0 -1
  118. package/dist/inference/interruption/defaults.cjs +0 -76
  119. package/dist/inference/interruption/defaults.cjs.map +0 -1
  120. package/dist/inference/interruption/defaults.d.cts +0 -14
  121. package/dist/inference/interruption/defaults.d.ts +0 -14
  122. package/dist/inference/interruption/defaults.d.ts.map +0 -1
  123. package/dist/inference/interruption/defaults.js +0 -42
  124. package/dist/inference/interruption/defaults.js.map +0 -1
  125. package/dist/inference/interruption/errors.cjs +0 -2
  126. package/dist/inference/interruption/errors.cjs.map +0 -1
  127. package/dist/inference/interruption/errors.d.cts +0 -2
  128. package/dist/inference/interruption/errors.d.ts +0 -2
  129. package/dist/inference/interruption/errors.d.ts.map +0 -1
  130. package/dist/inference/interruption/errors.js +0 -1
  131. package/dist/inference/interruption/errors.js.map +0 -1
  132. package/dist/inference/interruption/http_transport.cjs +0 -57
  133. package/dist/inference/interruption/http_transport.cjs.map +0 -1
  134. package/dist/inference/interruption/http_transport.d.cts +0 -23
  135. package/dist/inference/interruption/http_transport.d.ts +0 -23
  136. package/dist/inference/interruption/http_transport.d.ts.map +0 -1
  137. package/dist/inference/interruption/http_transport.js +0 -33
  138. package/dist/inference/interruption/http_transport.js.map +0 -1
  139. package/dist/inference/interruption/index.cjs.map +0 -1
  140. package/dist/inference/interruption/index.d.cts +0 -5
  141. package/dist/inference/interruption/index.d.ts +0 -5
  142. package/dist/inference/interruption/index.d.ts.map +0 -1
  143. package/dist/inference/interruption/index.js +0 -7
  144. package/dist/inference/interruption/index.js.map +0 -1
  145. package/dist/inference/interruption/interruption.cjs +0 -85
  146. package/dist/inference/interruption/interruption.cjs.map +0 -1
  147. package/dist/inference/interruption/interruption.d.cts +0 -48
  148. package/dist/inference/interruption/interruption.d.ts +0 -48
  149. package/dist/inference/interruption/interruption.d.ts.map +0 -1
  150. package/dist/inference/interruption/interruption.js +0 -59
  151. package/dist/inference/interruption/interruption.js.map +0 -1
  152. package/dist/inference/utils.test.cjs +0 -20
  153. package/dist/inference/utils.test.cjs.map +0 -1
  154. package/dist/inference/utils.test.js +0 -19
  155. package/dist/inference/utils.test.js.map +0 -1
  156. package/dist/utils/ws_transport.cjs +0 -51
  157. package/dist/utils/ws_transport.cjs.map +0 -1
  158. package/dist/utils/ws_transport.d.cts +0 -9
  159. package/dist/utils/ws_transport.d.ts +0 -9
  160. package/dist/utils/ws_transport.d.ts.map +0 -1
  161. package/dist/utils/ws_transport.js +0 -17
  162. package/dist/utils/ws_transport.js.map +0 -1
  163. package/dist/utils/ws_transport.test.cjs +0 -212
  164. package/dist/utils/ws_transport.test.cjs.map +0 -1
  165. package/dist/utils/ws_transport.test.js +0 -211
  166. package/dist/utils/ws_transport.test.js.map +0 -1
  167. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +0 -166
  168. package/src/inference/interruption/InterruptionStream.ts +0 -397
  169. package/src/inference/interruption/defaults.ts +0 -33
  170. package/src/inference/interruption/errors.ts +0 -0
  171. package/src/inference/interruption/http_transport.ts +0 -61
  172. package/src/inference/interruption/index.ts +0 -4
  173. package/src/inference/interruption/interruption.ts +0 -88
  174. package/src/inference/utils.test.ts +0 -31
  175. package/src/utils/ws_transport.test.ts +0 -282
  176. package/src/utils/ws_transport.ts +0 -22
@@ -0,0 +1,937 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { z } from 'zod';
5
+ import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js';
6
+ import { ChatContext } from '../../llm/chat_context.js';
7
+ import type { LLM } from '../../llm/llm.js';
8
+ import { tool } from '../../llm/tool_context.js';
9
+ import type { Task } from '../../utils.js';
10
+ import { Future } from '../../utils.js';
11
+ import type { Agent } from '../agent.js';
12
+ import { type SpeechHandle, isSpeechHandle } from '../speech_handle.js';
13
+ import {
14
+ type AgentHandoffAssertOptions,
15
+ type AgentHandoffEvent,
16
+ type ChatMessageEvent,
17
+ type EventType,
18
+ type FunctionCallAssertOptions,
19
+ type FunctionCallEvent,
20
+ type FunctionCallOutputAssertOptions,
21
+ type FunctionCallOutputEvent,
22
+ type MessageAssertOptions,
23
+ type RunEvent,
24
+ isAgentHandoffEvent,
25
+ isChatMessageEvent,
26
+ isFunctionCallEvent,
27
+ isFunctionCallOutputEvent,
28
+ } from './types.js';
29
+
30
+ // Type for agent constructor (used in assertions)
31
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
32
+ type AgentConstructor = new (...args: any[]) => Agent;
33
+
34
+ // Environment variable for verbose output
35
+ const evalsVerbose = parseInt(process.env.LIVEKIT_EVALS_VERBOSE || '0', 10);
36
+
37
+ /**
38
+ * Result of a test run containing recorded events and assertion utilities.
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * const result = await session.run({ userInput: 'Hello' });
43
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
44
+ * result.expect.noMoreEvents();
45
+ * ```
46
+ */
47
+ export class RunResult<T = unknown> {
48
+ private _events: RunEvent[] = [];
49
+ private doneFut = new Future<void>();
50
+ private userInput?: string;
51
+
52
+ private handles: Set<SpeechHandle | Task<void>> = new Set();
53
+ private lastSpeechHandle?: SpeechHandle;
54
+ private runAssert?: RunAssert;
55
+
56
+ // TODO(brian): Add typed output support for parity with Python
57
+ // - Add outputType?: new (...args: unknown[]) => T
58
+ // - Add finalOutput?: T
59
+ // - Implement markDone() to extract final_output from SpeechHandle.maybeRunFinalOutput
60
+ // - See Python: run_result.py lines 182-201
61
+
62
+ constructor(options?: { userInput?: string }) {
63
+ this.userInput = options?.userInput;
64
+ }
65
+
66
+ /**
67
+ * List of all recorded events generated during the run.
68
+ */
69
+ get events(): RunEvent[] {
70
+ return this._events;
71
+ }
72
+
73
+ /**
74
+ * Provides an assertion helper for verifying the run events.
75
+ */
76
+ get expect(): RunAssert {
77
+ if (evalsVerbose) {
78
+ const eventsStr = formatEvents(this._events)
79
+ .map((line) => ` ${line}`)
80
+ .join('\n');
81
+ console.log(
82
+ `\n+ RunResult {\n userInput: "${this.userInput}"\n events: [\n${eventsStr}\n ]\n }`,
83
+ );
84
+ }
85
+
86
+ // Cache the RunAssert so cursor position persists across multiple .expect accesses
87
+ if (!this.runAssert) {
88
+ this.runAssert = new RunAssert(this);
89
+ }
90
+ return this.runAssert;
91
+ }
92
+
93
+ /**
94
+ * Returns the final output of the run after completion.
95
+ *
96
+ * @throws Error - Not implemented yet.
97
+ */
98
+ get finalOutput(): T {
99
+ // TODO(brian): Implement typed output support after AgentTask is implemented.
100
+ throw new Error('finalOutput is not yet implemented in JS.');
101
+ }
102
+
103
+ /**
104
+ * Indicates whether the run has finished processing all events.
105
+ */
106
+ done(): boolean {
107
+ return this.doneFut.done;
108
+ }
109
+
110
+ /**
111
+ * Wait for the RunResult to complete. Returns `this` for method chaining.
112
+ *
113
+ * @example
114
+ * ```ts
115
+ * const result = session.run({ userInput: 'Hi!' });
116
+ * await result.wait(); // waits for completion
117
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
118
+ * ```
119
+ */
120
+ async wait(): Promise<this> {
121
+ await this.doneFut.await;
122
+ return this;
123
+ }
124
+
125
+ /**
126
+ * @internal
127
+ * Records an agent handoff event.
128
+ */
129
+ _agentHandoff(params: { item: AgentHandoffItem; oldAgent?: Agent; newAgent: Agent }): void {
130
+ const event: AgentHandoffEvent = {
131
+ type: 'agent_handoff',
132
+ item: params.item,
133
+ oldAgent: params.oldAgent,
134
+ newAgent: params.newAgent,
135
+ };
136
+ const index = this._findInsertionIndex(event.item.createdAt);
137
+ this._events.splice(index, 0, event);
138
+ }
139
+
140
+ /**
141
+ * @internal
142
+ * Called when a chat item is added during the run.
143
+ */
144
+ _itemAdded(item: ChatItem): void {
145
+ if (this.doneFut.done) {
146
+ return;
147
+ }
148
+
149
+ let event: RunEvent | undefined;
150
+
151
+ if (item.type === 'message') {
152
+ event = { type: 'message', item };
153
+ } else if (item.type === 'function_call') {
154
+ event = { type: 'function_call', item };
155
+ } else if (item.type === 'function_call_output') {
156
+ event = { type: 'function_call_output', item };
157
+ }
158
+
159
+ if (event) {
160
+ const index = this._findInsertionIndex(item.createdAt);
161
+ this._events.splice(index, 0, event);
162
+ }
163
+ }
164
+
165
+ /**
166
+ * @internal
167
+ * Watch a speech handle or task for completion.
168
+ */
169
+ _watchHandle(handle: SpeechHandle | Task<void>): void {
170
+ this.handles.add(handle);
171
+
172
+ if (isSpeechHandle(handle)) {
173
+ handle._addItemAddedCallback(this._itemAdded.bind(this));
174
+ }
175
+
176
+ handle.addDoneCallback(() => {
177
+ this._markDoneIfNeeded(handle);
178
+ });
179
+ }
180
+
181
+ /**
182
+ * @internal
183
+ * Unwatch a handle.
184
+ */
185
+ _unwatchHandle(handle: SpeechHandle | Task<void>): void {
186
+ this.handles.delete(handle);
187
+
188
+ if (isSpeechHandle(handle)) {
189
+ handle._removeItemAddedCallback(this._itemAdded.bind(this));
190
+ }
191
+ }
192
+
193
+ private _markDoneIfNeeded(handle: SpeechHandle | Task<void>): void {
194
+ if (isSpeechHandle(handle)) {
195
+ this.lastSpeechHandle = handle;
196
+ }
197
+
198
+ if ([...this.handles].every((h) => (isSpeechHandle(h) ? h.done() : h.done))) {
199
+ this._markDone();
200
+ }
201
+ }
202
+
203
+ private _markDone(): void {
204
+ // TODO(brian): Implement final output support after AgentTask is implemented.
205
+ // See Python run_result.py _mark_done() for reference:
206
+ // - Check lastSpeechHandle._maybeRunFinalOutput
207
+ // - Validate output type matches expected type
208
+ // - Set exception or resolve based on output
209
+ if (!this.doneFut.done) {
210
+ this.doneFut.resolve();
211
+ }
212
+ }
213
+
214
+ /**
215
+ * Find the correct insertion index to maintain chronological order.
216
+ */
217
+ private _findInsertionIndex(createdAt: number): number {
218
+ for (let i = this._events.length - 1; i >= 0; i--) {
219
+ if (this._events[i]!.item.createdAt <= createdAt) {
220
+ return i + 1;
221
+ }
222
+ }
223
+ return 0;
224
+ }
225
+ }
226
+
227
+ /**
228
+ * Assertion helper for verifying run events in sequence.
229
+ */
230
+ export class RunAssert {
231
+ private _events: RunEvent[];
232
+ private _currentIndex = 0;
233
+
234
+ constructor(runResult: RunResult) {
235
+ this._events = runResult.events;
236
+ }
237
+
238
+ /**
239
+ * Access a specific event by index for assertions.
240
+ * Supports negative indices (e.g., -1 for last event).
241
+ *
242
+ * @example
243
+ * ```typescript
244
+ * result.expect.at(0).isMessage({ role: 'user' });
245
+ * result.expect.at(-1).isMessage({ role: 'assistant' });
246
+ * ```
247
+ */
248
+ at(index: number): EventAssert {
249
+ let normalizedIndex = index;
250
+ if (index < 0) {
251
+ normalizedIndex = this._events.length + index;
252
+ }
253
+
254
+ if (normalizedIndex < 0 || normalizedIndex >= this._events.length) {
255
+ this._raiseWithDebugInfo(
256
+ `at(${index}) out of range (total events: ${this._events.length})`,
257
+ normalizedIndex,
258
+ );
259
+ }
260
+
261
+ return new EventAssert(this._events[normalizedIndex]!, this, normalizedIndex);
262
+ }
263
+
264
+ /**
265
+ * Advance to the next event, optionally filtering by type.
266
+ *
267
+ * @example
268
+ * ```typescript
269
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
270
+ * result.expect.nextEvent({ type: 'function_call' }).isFunctionCall({ name: 'foo' });
271
+ * ```
272
+ */
273
+ nextEvent(options?: { type?: EventType }): EventAssert {
274
+ while (true) {
275
+ const evAssert = this._currentEvent();
276
+ this._currentIndex++;
277
+
278
+ if (!options?.type || evAssert.event().type === options.type) {
279
+ return evAssert;
280
+ }
281
+ }
282
+ }
283
+
284
+ /**
285
+ * Skip a specified number of upcoming events without assertions.
286
+ *
287
+ * @example
288
+ * ```typescript
289
+ * result.expect.skipNext(2);
290
+ * ```
291
+ */
292
+ skipNext(count: number = 1): this {
293
+ for (let i = 0; i < count; i++) {
294
+ if (this._currentIndex >= this._events.length) {
295
+ this._raiseWithDebugInfo(`Tried to skip ${count} event(s), but only ${i} were available.`);
296
+ }
297
+ this._currentIndex++;
298
+ }
299
+ return this;
300
+ }
301
+
302
+ /**
303
+ * Conditionally skip the next event if it matches the specified criteria.
304
+ * Returns the event assertion if matched and skipped, or undefined if not matched.
305
+ *
306
+ * @example
307
+ * ```typescript
308
+ * // Skip optional assistant message before function call
309
+ * result.expect.skipNextEventIf({ type: 'message', role: 'assistant' });
310
+ * result.expect.nextEvent().isFunctionCall({ name: 'foo' });
311
+ * ```
312
+ */
313
+ skipNextEventIf(
314
+ options:
315
+ | { type: 'message'; role?: ChatRole }
316
+ | { type: 'function_call'; name?: string; args?: Record<string, unknown> }
317
+ | { type: 'function_call_output'; output?: string; isError?: boolean }
318
+ | { type: 'agent_handoff'; newAgentType?: AgentConstructor },
319
+ ):
320
+ | MessageAssert
321
+ | FunctionCallAssert
322
+ | FunctionCallOutputAssert
323
+ | AgentHandoffAssert
324
+ | undefined {
325
+ if (this._currentIndex >= this._events.length) {
326
+ return undefined;
327
+ }
328
+
329
+ try {
330
+ const evAssert = this._currentEvent();
331
+
332
+ if (options.type === 'message') {
333
+ const { role } = options;
334
+ const result = evAssert.isMessage({ role });
335
+ this._currentIndex++;
336
+ return result;
337
+ } else if (options.type === 'function_call') {
338
+ const { name, args } = options;
339
+ const result = evAssert.isFunctionCall({
340
+ name,
341
+ args,
342
+ });
343
+ this._currentIndex++;
344
+ return result;
345
+ } else if (options.type === 'function_call_output') {
346
+ const { output, isError } = options;
347
+ const result = evAssert.isFunctionCallOutput({
348
+ output,
349
+ isError,
350
+ });
351
+ this._currentIndex++;
352
+ return result;
353
+ } else if (options.type === 'agent_handoff') {
354
+ const { newAgentType } = options;
355
+ const result = evAssert.isAgentHandoff({ newAgentType });
356
+ this._currentIndex++;
357
+ return result;
358
+ }
359
+ } catch {
360
+ // Assertion failed, event doesn't match criteria
361
+ return undefined;
362
+ }
363
+
364
+ return undefined;
365
+ }
366
+
367
+ /**
368
+ * Get an EventRangeAssert for a range of events.
369
+ * Similar to Python's slice access: expect[0:3] or expect[:]
370
+ *
371
+ * @param start - Start index (inclusive), defaults to 0
372
+ * @param end - End index (exclusive), defaults to events.length
373
+ *
374
+ * @example
375
+ * ```typescript
376
+ * // Search all events
377
+ * result.expect.range().containsFunctionCall({ name: 'foo' });
378
+ * // Search first 3 events
379
+ * result.expect.range(0, 3).containsMessage({ role: 'assistant' });
380
+ * ```
381
+ */
382
+ range(start?: number, end?: number): EventRangeAssert {
383
+ const startIdx = start ?? 0;
384
+ const endIdx = end ?? this._events.length;
385
+ const events = this._events.slice(startIdx, endIdx);
386
+ return new EventRangeAssert(events, this, { start: startIdx, end: endIdx });
387
+ }
388
+
389
+ /**
390
+ * Assert that a function call matching criteria exists anywhere in the events.
391
+ *
392
+ * @example
393
+ * ```typescript
394
+ * result.expect.containsFunctionCall({ name: 'order_item' });
395
+ * ```
396
+ */
397
+ containsFunctionCall(options?: FunctionCallAssertOptions): FunctionCallAssert {
398
+ return this.range().containsFunctionCall(options);
399
+ }
400
+
401
+ /**
402
+ * Assert that a message matching criteria exists anywhere in the events.
403
+ *
404
+ * @example
405
+ * ```typescript
406
+ * result.expect.containsMessage({ role: 'assistant' });
407
+ * ```
408
+ */
409
+ containsMessage(options?: MessageAssertOptions): MessageAssert {
410
+ return this.range().containsMessage(options);
411
+ }
412
+
413
+ /**
414
+ * Assert that a function call output matching criteria exists anywhere in the events.
415
+ *
416
+ * @example
417
+ * ```typescript
418
+ * result.expect.containsFunctionCallOutput({ isError: false });
419
+ * ```
420
+ */
421
+ containsFunctionCallOutput(options?: FunctionCallOutputAssertOptions): FunctionCallOutputAssert {
422
+ return this.range().containsFunctionCallOutput(options);
423
+ }
424
+
425
+ /**
426
+ * Assert that an agent handoff matching criteria exists anywhere in the events.
427
+ *
428
+ * @example
429
+ * ```typescript
430
+ * result.expect.containsAgentHandoff({ newAgentType: MyAgent });
431
+ * ```
432
+ */
433
+ containsAgentHandoff(options?: AgentHandoffAssertOptions): AgentHandoffAssert {
434
+ return this.range().containsAgentHandoff(options);
435
+ }
436
+
437
+ /**
438
+ * Assert that there are no further events.
439
+ *
440
+ * @example
441
+ * ```typescript
442
+ * result.expect.noMoreEvents();
443
+ * ```
444
+ */
445
+ noMoreEvents(): void {
446
+ if (this._currentIndex < this._events.length) {
447
+ const event = this._events[this._currentIndex]!;
448
+ this._raiseWithDebugInfo(`Expected no more events, but found: ${event.type}`);
449
+ }
450
+ }
451
+
452
+ private _currentEvent(): EventAssert {
453
+ if (this._currentIndex >= this._events.length) {
454
+ this._raiseWithDebugInfo('Expected another event, but none left.');
455
+ }
456
+ return this.at(this._currentIndex);
457
+ }
458
+
459
+ /** @internal */
460
+ _raiseWithDebugInfo(message: string, index?: number): never {
461
+ const markerIndex = index ?? this._currentIndex;
462
+ const eventsStr = formatEvents(this._events, markerIndex).join('\n');
463
+ throw new AssertionError(`${message}\nContext around failure:\n${eventsStr}`);
464
+ }
465
+ }
466
+
467
+ /**
468
+ * Assertion wrapper for a single event.
469
+ */
470
+ export class EventAssert {
471
+ protected _event: RunEvent;
472
+ protected _parent: RunAssert;
473
+ protected _index: number;
474
+
475
+ constructor(event: RunEvent, parent: RunAssert, index: number) {
476
+ this._event = event;
477
+ this._parent = parent;
478
+ this._index = index;
479
+ }
480
+
481
+ /**
482
+ * Get the underlying event.
483
+ */
484
+ event(): RunEvent {
485
+ return this._event;
486
+ }
487
+
488
+ protected _raise(message: string): never {
489
+ this._parent._raiseWithDebugInfo(message, this._index);
490
+ }
491
+
492
+ /**
493
+ * Verify this event is a message with optional role matching.
494
+ *
495
+ * @example
496
+ * ```typescript
497
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
498
+ * ```
499
+ */
500
+ isMessage(options?: MessageAssertOptions): MessageAssert {
501
+ if (!isChatMessageEvent(this._event)) {
502
+ this._raise(`Expected ChatMessageEvent, got ${this._event.type}`);
503
+ }
504
+
505
+ if (options?.role && this._event.item.role !== options.role) {
506
+ this._raise(`Expected role '${options.role}', got '${this._event.item.role}'`);
507
+ }
508
+
509
+ return new MessageAssert(this._event, this._parent, this._index);
510
+ }
511
+
512
+ /**
513
+ * Verify this event is a function call with optional name/args matching.
514
+ *
515
+ * @example
516
+ * ```typescript
517
+ * result.expect.nextEvent().isFunctionCall({ name: 'order_item', args: { id: 'big_mac' } });
518
+ * ```
519
+ */
520
+ isFunctionCall(options?: FunctionCallAssertOptions): FunctionCallAssert {
521
+ if (!isFunctionCallEvent(this._event)) {
522
+ this._raise(`Expected FunctionCallEvent, got ${this._event.type}`);
523
+ }
524
+
525
+ if (options?.name && this._event.item.name !== options.name) {
526
+ this._raise(`Expected call name '${options.name}', got '${this._event.item.name}'`);
527
+ }
528
+
529
+ if (options?.args) {
530
+ let actual: Record<string, unknown>;
531
+ try {
532
+ actual = JSON.parse(this._event.item.args);
533
+ } catch {
534
+ this._raise(`Failed to parse function call arguments: ${this._event.item.args}`);
535
+ }
536
+
537
+ for (const [key, value] of Object.entries(options.args)) {
538
+ if (!(key in actual) || actual[key] !== value) {
539
+ this._raise(
540
+ `For key '${key}', expected ${JSON.stringify(value)}, got ${JSON.stringify(actual[key])}`,
541
+ );
542
+ }
543
+ }
544
+ }
545
+
546
+ return new FunctionCallAssert(this._event, this._parent, this._index);
547
+ }
548
+
549
+ /**
550
+ * Verify this event is a function call output with optional matching.
551
+ *
552
+ * @example
553
+ * ```typescript
554
+ * result.expect.nextEvent().isFunctionCallOutput({ isError: false });
555
+ * ```
556
+ */
557
+ isFunctionCallOutput(options?: FunctionCallOutputAssertOptions): FunctionCallOutputAssert {
558
+ if (!isFunctionCallOutputEvent(this._event)) {
559
+ this._raise(`Expected FunctionCallOutputEvent, got ${this._event.type}`);
560
+ }
561
+
562
+ if (options?.output !== undefined && this._event.item.output !== options.output) {
563
+ this._raise(`Expected output '${options.output}', got '${this._event.item.output}'`);
564
+ }
565
+
566
+ if (options?.isError !== undefined && this._event.item.isError !== options.isError) {
567
+ this._raise(`Expected isError=${options.isError}, got ${this._event.item.isError}`);
568
+ }
569
+
570
+ return new FunctionCallOutputAssert(this._event, this._parent, this._index);
571
+ }
572
+
573
+ /**
574
+ * Verify this event is an agent handoff with optional type matching.
575
+ *
576
+ * @example
577
+ * ```typescript
578
+ * result.expect.nextEvent().isAgentHandoff({ newAgentType: MyAgent });
579
+ * ```
580
+ */
581
+ isAgentHandoff(options?: AgentHandoffAssertOptions): AgentHandoffAssert {
582
+ if (!isAgentHandoffEvent(this._event)) {
583
+ this._raise(`Expected AgentHandoffEvent, got ${this._event.type}`);
584
+ }
585
+
586
+ const event = this._event;
587
+
588
+ if (options?.newAgentType) {
589
+ const actualType = event.newAgent.constructor.name;
590
+ if (!(event.newAgent instanceof options.newAgentType)) {
591
+ this._raise(`Expected new_agent '${options.newAgentType.name}', got '${actualType}'`);
592
+ }
593
+ }
594
+
595
+ return new AgentHandoffAssert(event, this._parent, this._index);
596
+ }
597
+ }
598
+
599
+ /**
600
+ * Assertion wrapper for a range of events.
601
+ * Provides contains*() methods to search within the range.
602
+ */
603
+ export class EventRangeAssert {
604
+ private _events: RunEvent[];
605
+ private _parent: RunAssert;
606
+ private _range: { start: number; end: number };
607
+
608
+ constructor(events: RunEvent[], parent: RunAssert, range: { start: number; end: number }) {
609
+ this._events = events;
610
+ this._parent = parent;
611
+ this._range = range;
612
+ }
613
+
614
+ /**
615
+ * Assert that a function call matching criteria exists in this event range.
616
+ *
617
+ * @example
618
+ * ```typescript
619
+ * result.expect.range(0, 3).containsFunctionCall({ name: 'foo' });
620
+ * ```
621
+ */
622
+ containsFunctionCall(options?: FunctionCallAssertOptions): FunctionCallAssert {
623
+ for (let idx = 0; idx < this._events.length; idx++) {
624
+ const ev = this._events[idx]!;
625
+ const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
626
+ try {
627
+ return candidate.isFunctionCall(options);
628
+ } catch {
629
+ // Continue searching
630
+ }
631
+ }
632
+
633
+ this._parent._raiseWithDebugInfo(
634
+ `No FunctionCallEvent satisfying criteria found in range [${this._range.start}:${this._range.end}]`,
635
+ );
636
+ }
637
+
638
+ /**
639
+ * Assert that a message matching criteria exists in this event range.
640
+ *
641
+ * @example
642
+ * ```typescript
643
+ * result.expect.range(0, 2).containsMessage({ role: 'assistant' });
644
+ * ```
645
+ */
646
+ containsMessage(options?: MessageAssertOptions): MessageAssert {
647
+ for (let idx = 0; idx < this._events.length; idx++) {
648
+ const ev = this._events[idx]!;
649
+ const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
650
+ try {
651
+ return candidate.isMessage(options);
652
+ } catch {
653
+ // Continue searching
654
+ }
655
+ }
656
+
657
+ this._parent._raiseWithDebugInfo(
658
+ `No ChatMessageEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
659
+ );
660
+ }
661
+
662
+ /**
663
+ * Assert that a function call output matching criteria exists in this event range.
664
+ *
665
+ * @example
666
+ * ```typescript
667
+ * result.expect.range(1, 4).containsFunctionCallOutput({ isError: true });
668
+ * ```
669
+ */
670
+ containsFunctionCallOutput(options?: FunctionCallOutputAssertOptions): FunctionCallOutputAssert {
671
+ for (let idx = 0; idx < this._events.length; idx++) {
672
+ const ev = this._events[idx]!;
673
+ const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
674
+ try {
675
+ return candidate.isFunctionCallOutput(options);
676
+ } catch {
677
+ // Continue searching
678
+ }
679
+ }
680
+
681
+ this._parent._raiseWithDebugInfo(
682
+ `No FunctionCallOutputEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
683
+ );
684
+ }
685
+
686
+ /**
687
+ * Assert that an agent handoff matching criteria exists in this event range.
688
+ *
689
+ * @example
690
+ * ```typescript
691
+ * result.expect.range(0, 3).containsAgentHandoff({ newAgentType: MyAgent });
692
+ * ```
693
+ */
694
+ containsAgentHandoff(options?: AgentHandoffAssertOptions): AgentHandoffAssert {
695
+ for (let idx = 0; idx < this._events.length; idx++) {
696
+ const ev = this._events[idx]!;
697
+ const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
698
+ try {
699
+ return candidate.isAgentHandoff(options);
700
+ } catch {
701
+ // Continue searching
702
+ }
703
+ }
704
+
705
+ this._parent._raiseWithDebugInfo(
706
+ `No AgentHandoffEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
707
+ );
708
+ }
709
+ }
710
+
711
+ /**
712
+ * Assertion wrapper for message events.
713
+ */
714
+ export class MessageAssert extends EventAssert {
715
+ protected declare _event: ChatMessageEvent;
716
+
717
+ constructor(event: ChatMessageEvent, parent: RunAssert, index: number) {
718
+ super(event, parent, index);
719
+ }
720
+
721
+ override event(): ChatMessageEvent {
722
+ return this._event;
723
+ }
724
+
725
+ /**
726
+ * Evaluate whether the message fulfills the given intent using an LLM.
727
+ *
728
+ * @param llm - LLM instance for judgment
729
+ * @param options - Options containing the intent description
730
+ * @returns Self for chaining further assertions
731
+ *
732
+ * @example
733
+ * ```typescript
734
+ * await result.expect
735
+ * .nextEvent()
736
+ * .isMessage({ role: 'assistant' })
737
+ * .judge(llm, { intent: 'should ask for the drink size' });
738
+ * ```
739
+ */
740
+ async judge(llm: LLM, options: { intent: string }): Promise<MessageAssert> {
741
+ const { intent } = options;
742
+
743
+ // Extract text content from message
744
+ const content = this._event.item.content;
745
+ const msgContent =
746
+ typeof content === 'string'
747
+ ? content
748
+ : Array.isArray(content)
749
+ ? content.filter((c): c is string => typeof c === 'string').join(' ')
750
+ : '';
751
+
752
+ if (!msgContent) {
753
+ this._raise('The chat message is empty.');
754
+ }
755
+
756
+ if (!intent) {
757
+ this._raise('Intent is required to judge the message.');
758
+ }
759
+
760
+ // Create the check_intent tool
761
+ const checkIntentTool = tool({
762
+ description:
763
+ 'Determines whether the message correctly fulfills the given intent. ' +
764
+ 'Returns success=true if the message satisfies the intent, false otherwise. ' +
765
+ 'Provide a concise reason justifying the result.',
766
+ parameters: z.object({
767
+ success: z.boolean().describe('Whether the message satisfies the intent'),
768
+ reason: z.string().describe('A concise explanation justifying the result'),
769
+ }),
770
+ execute: async ({ success, reason }: { success: boolean; reason: string }) => {
771
+ return { success, reason };
772
+ },
773
+ });
774
+
775
+ // Create chat context for the judge
776
+ const chatCtx = ChatContext.empty();
777
+ chatCtx.addMessage({
778
+ role: 'system',
779
+ content:
780
+ 'You are a test evaluator for conversational agents.\n' +
781
+ 'You will be shown a message and a target intent. Determine whether the message accomplishes the intent.\n' +
782
+ 'Only respond by calling the `check_intent(success: bool, reason: str)` function with your final judgment.\n' +
783
+ 'Be strict: if the message does not clearly fulfill the intent, return `success = false` and explain why.',
784
+ });
785
+ chatCtx.addMessage({
786
+ role: 'user',
787
+ content:
788
+ 'Check if the following message fulfills the given intent.\n\n' +
789
+ `Intent:\n${intent}\n\n` +
790
+ `Message:\n${msgContent}`,
791
+ });
792
+
793
+ // Call the LLM with the check_intent tool
794
+ let toolArgs: { success: boolean; reason: string } | undefined;
795
+
796
+ const stream = llm.chat({
797
+ chatCtx,
798
+ toolCtx: { check_intent: checkIntentTool },
799
+ toolChoice: { type: 'function', function: { name: 'check_intent' } },
800
+ extraKwargs: { temperature: 0 },
801
+ });
802
+
803
+ for await (const chunk of stream) {
804
+ if (!chunk.delta) continue;
805
+
806
+ if (chunk.delta.toolCalls && chunk.delta.toolCalls.length > 0) {
807
+ const toolCall = chunk.delta.toolCalls[0]!;
808
+ if (toolCall.args) {
809
+ try {
810
+ toolArgs = JSON.parse(toolCall.args);
811
+ } catch {
812
+ // Args might be streamed incrementally, keep the last valid parse
813
+ }
814
+ }
815
+ }
816
+ }
817
+
818
+ if (!toolArgs) {
819
+ this._raise('LLM did not return any arguments for evaluation.');
820
+ }
821
+
822
+ const { success, reason } = toolArgs;
823
+
824
+ if (!success) {
825
+ this._raise(`Judgment failed: ${reason}`);
826
+ } else if (evalsVerbose) {
827
+ const printMsg =
828
+ msgContent.length > 30 ? msgContent.slice(0, 30).replace(/\n/g, '\\n') + '...' : msgContent;
829
+ console.log(`- Judgment succeeded for \`${printMsg}\`: \`${reason}\``);
830
+ }
831
+
832
+ return this;
833
+ }
834
+ }
835
+
836
+ /**
837
+ * Assertion wrapper for function call events.
838
+ */
839
+ export class FunctionCallAssert extends EventAssert {
840
+ protected declare _event: FunctionCallEvent;
841
+
842
+ constructor(event: FunctionCallEvent, parent: RunAssert, index: number) {
843
+ super(event, parent, index);
844
+ }
845
+
846
+ override event(): FunctionCallEvent {
847
+ return this._event;
848
+ }
849
+ }
850
+
851
+ /**
852
+ * Assertion wrapper for function call output events.
853
+ */
854
+ export class FunctionCallOutputAssert extends EventAssert {
855
+ protected declare _event: FunctionCallOutputEvent;
856
+
857
+ constructor(event: FunctionCallOutputEvent, parent: RunAssert, index: number) {
858
+ super(event, parent, index);
859
+ }
860
+
861
+ override event(): FunctionCallOutputEvent {
862
+ return this._event;
863
+ }
864
+ }
865
+
866
+ /**
867
+ * Assertion wrapper for agent handoff events.
868
+ */
869
+ export class AgentHandoffAssert extends EventAssert {
870
+ protected declare _event: AgentHandoffEvent;
871
+
872
+ constructor(event: AgentHandoffEvent, parent: RunAssert, index: number) {
873
+ super(event, parent, index);
874
+ }
875
+
876
+ override event(): AgentHandoffEvent {
877
+ return this._event;
878
+ }
879
+ }
880
+
881
+ /**
882
+ * Custom assertion error for test failures.
883
+ */
884
+ export class AssertionError extends Error {
885
+ constructor(message: string) {
886
+ super(message);
887
+ this.name = 'AssertionError';
888
+ Error.captureStackTrace?.(this, AssertionError);
889
+ }
890
+ }
891
+
892
+ // TODO: mockTools() utility for mocking tool implementations in tests
893
+ // Will be implemented for test suites.
894
+ // See Python run_result.py lines 1010-1031 for reference.
895
+
896
+ /**
897
+ * Format events for debug output, optionally marking a selected index.
898
+ */
899
+ function formatEvents(events: RunEvent[], selectedIndex?: number): string[] {
900
+ const lines: string[] = [];
901
+
902
+ for (let i = 0; i < events.length; i++) {
903
+ const event = events[i]!;
904
+ let prefix = '';
905
+ if (selectedIndex !== undefined) {
906
+ prefix = i === selectedIndex ? '>>>' : ' ';
907
+ }
908
+
909
+ let line: string;
910
+ if (isChatMessageEvent(event)) {
911
+ const { role, content, interrupted } = event.item;
912
+ const textContent =
913
+ typeof content === 'string'
914
+ ? content
915
+ : Array.isArray(content)
916
+ ? content.filter((c): c is string => typeof c === 'string').join(' ')
917
+ : '';
918
+ const truncated = textContent.length > 50 ? textContent.slice(0, 50) + '...' : textContent;
919
+ line = `${prefix}[${i}] { type: "message", role: "${role}", content: "${truncated}", interrupted: ${interrupted} }`;
920
+ } else if (isFunctionCallEvent(event)) {
921
+ const { name, args } = event.item;
922
+ line = `${prefix}[${i}] { type: "function_call", name: "${name}", args: ${args} }`;
923
+ } else if (isFunctionCallOutputEvent(event)) {
924
+ const { output, isError } = event.item;
925
+ const truncated = output.length > 50 ? output.slice(0, 50) + '...' : output;
926
+ line = `${prefix}[${i}] { type: "function_call_output", output: "${truncated}", isError: ${isError} }`;
927
+ } else if (isAgentHandoffEvent(event)) {
928
+ line = `${prefix}[${i}] { type: "agent_handoff", oldAgent: "${event.oldAgent?.constructor.name}", newAgent: "${event.newAgent.constructor.name}" }`;
929
+ } else {
930
+ line = `${prefix}[${i}] ${event}`;
931
+ }
932
+
933
+ lines.push(line);
934
+ }
935
+
936
+ return lines;
937
+ }