@livekit/agents 1.0.36 → 1.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs.map +1 -1
- package/dist/inference/api_protos.cjs +68 -0
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +345 -4
- package/dist/inference/api_protos.d.ts +345 -4
- package/dist/inference/api_protos.d.ts.map +1 -1
- package/dist/inference/api_protos.js +60 -0
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/inference/stt.cjs +32 -21
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +34 -21
- package/dist/inference/stt.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/stt/stt.cjs +10 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +12 -0
- package/dist/stt/stt.d.ts +12 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +10 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/traces.cjs +4 -3
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +2 -0
- package/dist/telemetry/traces.d.ts +2 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +4 -3
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +2 -0
- package/dist/utils.d.ts +2 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +6 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent.cjs +5 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +5 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +49 -23
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +1 -1
- package/dist/voice/agent_activity.d.ts +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +50 -24
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +7 -5
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +5 -2
- package/dist/voice/agent_session.d.ts +5 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +7 -5
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +3 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +3 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +6 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.d.cts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
- package/dist/voice/avatar/datastream_io.js +6 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/generation.cjs +14 -5
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -2
- package/dist/voice/generation.d.ts +3 -2
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +14 -5
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/io.cjs +12 -0
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +19 -1
- package/dist/voice/io.d.ts +19 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +12 -0
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +91 -28
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.cts +7 -1
- package/dist/voice/recorder_io/recorder_io.d.ts +7 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +91 -28
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +40 -11
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +4 -1
- package/dist/voice/room_io/_input.d.ts +4 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +31 -2
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +6 -0
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.d.cts +1 -0
- package/dist/voice/room_io/_output.d.ts +1 -0
- package/dist/voice/room_io/_output.d.ts.map +1 -1
- package/dist/voice/room_io/_output.js +6 -0
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +2 -2
- package/dist/voice/room_io/room_io.d.ts +2 -2
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +2 -0
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +3 -0
- package/dist/voice/speech_handle.d.ts +3 -0
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +2 -0
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/index.cjs +2 -0
- package/dist/voice/testing/index.cjs.map +1 -1
- package/dist/voice/testing/index.d.cts +1 -1
- package/dist/voice/testing/index.d.ts +1 -1
- package/dist/voice/testing/index.d.ts.map +1 -1
- package/dist/voice/testing/index.js +2 -0
- package/dist/voice/testing/index.js.map +1 -1
- package/dist/voice/testing/run_result.cjs +294 -5
- package/dist/voice/testing/run_result.cjs.map +1 -1
- package/dist/voice/testing/run_result.d.cts +149 -1
- package/dist/voice/testing/run_result.d.ts +149 -1
- package/dist/voice/testing/run_result.d.ts.map +1 -1
- package/dist/voice/testing/run_result.js +293 -5
- package/dist/voice/testing/run_result.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/api_protos.ts +83 -0
- package/src/inference/stt.ts +39 -22
- package/src/stt/stt.ts +21 -0
- package/src/telemetry/traces.ts +6 -2
- package/src/utils.ts +7 -0
- package/src/voice/agent.ts +9 -0
- package/src/voice/agent_activity.ts +72 -26
- package/src/voice/agent_session.ts +6 -5
- package/src/voice/audio_recognition.ts +2 -0
- package/src/voice/avatar/datastream_io.ts +8 -0
- package/src/voice/generation.ts +24 -12
- package/src/voice/io.ts +27 -5
- package/src/voice/recorder_io/recorder_io.ts +123 -31
- package/src/voice/room_io/_input.ts +32 -4
- package/src/voice/room_io/_output.ts +8 -0
- package/src/voice/room_io/room_io.ts +3 -1
- package/src/voice/speech_handle.ts +4 -0
- package/src/voice/testing/index.ts +1 -0
- package/src/voice/testing/run_result.ts +373 -12
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
import type { AgentHandoffItem, ChatItem, ChatRole } from '../../llm/chat_context.js';
|
|
6
|
+
import { ChatContext } from '../../llm/chat_context.js';
|
|
7
|
+
import type { LLM } from '../../llm/llm.js';
|
|
8
|
+
import { tool } from '../../llm/tool_context.js';
|
|
5
9
|
import type { Task } from '../../utils.js';
|
|
6
10
|
import { Future } from '../../utils.js';
|
|
7
11
|
import type { Agent } from '../agent.js';
|
|
@@ -23,6 +27,10 @@ import {
|
|
|
23
27
|
isFunctionCallOutputEvent,
|
|
24
28
|
} from './types.js';
|
|
25
29
|
|
|
30
|
+
// Type for agent constructor (used in assertions)
|
|
31
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
32
|
+
type AgentConstructor = new (...args: any[]) => Agent;
|
|
33
|
+
|
|
26
34
|
// Environment variable for verbose output
|
|
27
35
|
const evalsVerbose = parseInt(process.env.LIVEKIT_EVALS_VERBOSE || '0', 10);
|
|
28
36
|
|
|
@@ -141,11 +149,11 @@ export class RunResult<T = unknown> {
|
|
|
141
149
|
let event: RunEvent | undefined;
|
|
142
150
|
|
|
143
151
|
if (item.type === 'message') {
|
|
144
|
-
event = { type: 'message', item }
|
|
152
|
+
event = { type: 'message', item };
|
|
145
153
|
} else if (item.type === 'function_call') {
|
|
146
|
-
event = { type: 'function_call', item }
|
|
154
|
+
event = { type: 'function_call', item };
|
|
147
155
|
} else if (item.type === 'function_call_output') {
|
|
148
|
-
event = { type: 'function_call_output', item }
|
|
156
|
+
event = { type: 'function_call_output', item };
|
|
149
157
|
}
|
|
150
158
|
|
|
151
159
|
if (event) {
|
|
@@ -223,11 +231,6 @@ export class RunAssert {
|
|
|
223
231
|
private _events: RunEvent[];
|
|
224
232
|
private _currentIndex = 0;
|
|
225
233
|
|
|
226
|
-
// TODO(brian): Add range access for parity with Python __getitem__ slice support.
|
|
227
|
-
// - Add range(start?, end?) method returning EventRangeAssert
|
|
228
|
-
// - EventRangeAssert should have containsFunctionCall(), containsMessage() methods
|
|
229
|
-
// See Python run_result.py lines 247-251 for reference.
|
|
230
|
-
|
|
231
234
|
constructor(runResult: RunResult) {
|
|
232
235
|
this._events = runResult.events;
|
|
233
236
|
}
|
|
@@ -296,6 +299,141 @@ export class RunAssert {
|
|
|
296
299
|
return this;
|
|
297
300
|
}
|
|
298
301
|
|
|
302
|
+
/**
|
|
303
|
+
* Conditionally skip the next event if it matches the specified criteria.
|
|
304
|
+
* Returns the event assertion if matched and skipped, or undefined if not matched.
|
|
305
|
+
*
|
|
306
|
+
* @example
|
|
307
|
+
* ```typescript
|
|
308
|
+
* // Skip optional assistant message before function call
|
|
309
|
+
* result.expect.skipNextEventIf({ type: 'message', role: 'assistant' });
|
|
310
|
+
* result.expect.nextEvent().isFunctionCall({ name: 'foo' });
|
|
311
|
+
* ```
|
|
312
|
+
*/
|
|
313
|
+
skipNextEventIf(
|
|
314
|
+
options:
|
|
315
|
+
| { type: 'message'; role?: ChatRole }
|
|
316
|
+
| { type: 'function_call'; name?: string; args?: Record<string, unknown> }
|
|
317
|
+
| { type: 'function_call_output'; output?: string; isError?: boolean }
|
|
318
|
+
| { type: 'agent_handoff'; newAgentType?: AgentConstructor },
|
|
319
|
+
):
|
|
320
|
+
| MessageAssert
|
|
321
|
+
| FunctionCallAssert
|
|
322
|
+
| FunctionCallOutputAssert
|
|
323
|
+
| AgentHandoffAssert
|
|
324
|
+
| undefined {
|
|
325
|
+
if (this._currentIndex >= this._events.length) {
|
|
326
|
+
return undefined;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
try {
|
|
330
|
+
const evAssert = this._currentEvent();
|
|
331
|
+
|
|
332
|
+
if (options.type === 'message') {
|
|
333
|
+
const { role } = options;
|
|
334
|
+
const result = evAssert.isMessage({ role });
|
|
335
|
+
this._currentIndex++;
|
|
336
|
+
return result;
|
|
337
|
+
} else if (options.type === 'function_call') {
|
|
338
|
+
const { name, args } = options;
|
|
339
|
+
const result = evAssert.isFunctionCall({
|
|
340
|
+
name,
|
|
341
|
+
args,
|
|
342
|
+
});
|
|
343
|
+
this._currentIndex++;
|
|
344
|
+
return result;
|
|
345
|
+
} else if (options.type === 'function_call_output') {
|
|
346
|
+
const { output, isError } = options;
|
|
347
|
+
const result = evAssert.isFunctionCallOutput({
|
|
348
|
+
output,
|
|
349
|
+
isError,
|
|
350
|
+
});
|
|
351
|
+
this._currentIndex++;
|
|
352
|
+
return result;
|
|
353
|
+
} else if (options.type === 'agent_handoff') {
|
|
354
|
+
const { newAgentType } = options;
|
|
355
|
+
const result = evAssert.isAgentHandoff({ newAgentType });
|
|
356
|
+
this._currentIndex++;
|
|
357
|
+
return result;
|
|
358
|
+
}
|
|
359
|
+
} catch {
|
|
360
|
+
// Assertion failed, event doesn't match criteria
|
|
361
|
+
return undefined;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
return undefined;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
/**
|
|
368
|
+
* Get an EventRangeAssert for a range of events.
|
|
369
|
+
* Similar to Python's slice access: expect[0:3] or expect[:]
|
|
370
|
+
*
|
|
371
|
+
* @param start - Start index (inclusive), defaults to 0
|
|
372
|
+
* @param end - End index (exclusive), defaults to events.length
|
|
373
|
+
*
|
|
374
|
+
* @example
|
|
375
|
+
* ```typescript
|
|
376
|
+
* // Search all events
|
|
377
|
+
* result.expect.range().containsFunctionCall({ name: 'foo' });
|
|
378
|
+
* // Search first 3 events
|
|
379
|
+
* result.expect.range(0, 3).containsMessage({ role: 'assistant' });
|
|
380
|
+
* ```
|
|
381
|
+
*/
|
|
382
|
+
range(start?: number, end?: number): EventRangeAssert {
|
|
383
|
+
const startIdx = start ?? 0;
|
|
384
|
+
const endIdx = end ?? this._events.length;
|
|
385
|
+
const events = this._events.slice(startIdx, endIdx);
|
|
386
|
+
return new EventRangeAssert(events, this, { start: startIdx, end: endIdx });
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Assert that a function call matching criteria exists anywhere in the events.
|
|
391
|
+
*
|
|
392
|
+
* @example
|
|
393
|
+
* ```typescript
|
|
394
|
+
* result.expect.containsFunctionCall({ name: 'order_item' });
|
|
395
|
+
* ```
|
|
396
|
+
*/
|
|
397
|
+
containsFunctionCall(options?: FunctionCallAssertOptions): FunctionCallAssert {
|
|
398
|
+
return this.range().containsFunctionCall(options);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Assert that a message matching criteria exists anywhere in the events.
|
|
403
|
+
*
|
|
404
|
+
* @example
|
|
405
|
+
* ```typescript
|
|
406
|
+
* result.expect.containsMessage({ role: 'assistant' });
|
|
407
|
+
* ```
|
|
408
|
+
*/
|
|
409
|
+
containsMessage(options?: MessageAssertOptions): MessageAssert {
|
|
410
|
+
return this.range().containsMessage(options);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
/**
|
|
414
|
+
* Assert that a function call output matching criteria exists anywhere in the events.
|
|
415
|
+
*
|
|
416
|
+
* @example
|
|
417
|
+
* ```typescript
|
|
418
|
+
* result.expect.containsFunctionCallOutput({ isError: false });
|
|
419
|
+
* ```
|
|
420
|
+
*/
|
|
421
|
+
containsFunctionCallOutput(options?: FunctionCallOutputAssertOptions): FunctionCallOutputAssert {
|
|
422
|
+
return this.range().containsFunctionCallOutput(options);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Assert that an agent handoff matching criteria exists anywhere in the events.
|
|
427
|
+
*
|
|
428
|
+
* @example
|
|
429
|
+
* ```typescript
|
|
430
|
+
* result.expect.containsAgentHandoff({ newAgentType: MyAgent });
|
|
431
|
+
* ```
|
|
432
|
+
*/
|
|
433
|
+
containsAgentHandoff(options?: AgentHandoffAssertOptions): AgentHandoffAssert {
|
|
434
|
+
return this.range().containsAgentHandoff(options);
|
|
435
|
+
}
|
|
436
|
+
|
|
299
437
|
/**
|
|
300
438
|
* Assert that there are no further events.
|
|
301
439
|
*
|
|
@@ -445,8 +583,7 @@ export class EventAssert {
|
|
|
445
583
|
this._raise(`Expected AgentHandoffEvent, got ${this._event.type}`);
|
|
446
584
|
}
|
|
447
585
|
|
|
448
|
-
|
|
449
|
-
const event = this._event as AgentHandoffEvent;
|
|
586
|
+
const event = this._event;
|
|
450
587
|
|
|
451
588
|
if (options?.newAgentType) {
|
|
452
589
|
const actualType = event.newAgent.constructor.name;
|
|
@@ -459,6 +596,118 @@ export class EventAssert {
|
|
|
459
596
|
}
|
|
460
597
|
}
|
|
461
598
|
|
|
599
|
+
/**
|
|
600
|
+
* Assertion wrapper for a range of events.
|
|
601
|
+
* Provides contains*() methods to search within the range.
|
|
602
|
+
*/
|
|
603
|
+
export class EventRangeAssert {
|
|
604
|
+
private _events: RunEvent[];
|
|
605
|
+
private _parent: RunAssert;
|
|
606
|
+
private _range: { start: number; end: number };
|
|
607
|
+
|
|
608
|
+
constructor(events: RunEvent[], parent: RunAssert, range: { start: number; end: number }) {
|
|
609
|
+
this._events = events;
|
|
610
|
+
this._parent = parent;
|
|
611
|
+
this._range = range;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Assert that a function call matching criteria exists in this event range.
|
|
616
|
+
*
|
|
617
|
+
* @example
|
|
618
|
+
* ```typescript
|
|
619
|
+
* result.expect.range(0, 3).containsFunctionCall({ name: 'foo' });
|
|
620
|
+
* ```
|
|
621
|
+
*/
|
|
622
|
+
containsFunctionCall(options?: FunctionCallAssertOptions): FunctionCallAssert {
|
|
623
|
+
for (let idx = 0; idx < this._events.length; idx++) {
|
|
624
|
+
const ev = this._events[idx]!;
|
|
625
|
+
const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
|
|
626
|
+
try {
|
|
627
|
+
return candidate.isFunctionCall(options);
|
|
628
|
+
} catch {
|
|
629
|
+
// Continue searching
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
this._parent._raiseWithDebugInfo(
|
|
634
|
+
`No FunctionCallEvent satisfying criteria found in range [${this._range.start}:${this._range.end}]`,
|
|
635
|
+
);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
/**
|
|
639
|
+
* Assert that a message matching criteria exists in this event range.
|
|
640
|
+
*
|
|
641
|
+
* @example
|
|
642
|
+
* ```typescript
|
|
643
|
+
* result.expect.range(0, 2).containsMessage({ role: 'assistant' });
|
|
644
|
+
* ```
|
|
645
|
+
*/
|
|
646
|
+
containsMessage(options?: MessageAssertOptions): MessageAssert {
|
|
647
|
+
for (let idx = 0; idx < this._events.length; idx++) {
|
|
648
|
+
const ev = this._events[idx]!;
|
|
649
|
+
const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
|
|
650
|
+
try {
|
|
651
|
+
return candidate.isMessage(options);
|
|
652
|
+
} catch {
|
|
653
|
+
// Continue searching
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
this._parent._raiseWithDebugInfo(
|
|
658
|
+
`No ChatMessageEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
|
|
659
|
+
);
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Assert that a function call output matching criteria exists in this event range.
|
|
664
|
+
*
|
|
665
|
+
* @example
|
|
666
|
+
* ```typescript
|
|
667
|
+
* result.expect.range(1, 4).containsFunctionCallOutput({ isError: true });
|
|
668
|
+
* ```
|
|
669
|
+
*/
|
|
670
|
+
containsFunctionCallOutput(options?: FunctionCallOutputAssertOptions): FunctionCallOutputAssert {
|
|
671
|
+
for (let idx = 0; idx < this._events.length; idx++) {
|
|
672
|
+
const ev = this._events[idx]!;
|
|
673
|
+
const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
|
|
674
|
+
try {
|
|
675
|
+
return candidate.isFunctionCallOutput(options);
|
|
676
|
+
} catch {
|
|
677
|
+
// Continue searching
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
this._parent._raiseWithDebugInfo(
|
|
682
|
+
`No FunctionCallOutputEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
|
|
683
|
+
);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
/**
|
|
687
|
+
* Assert that an agent handoff matching criteria exists in this event range.
|
|
688
|
+
*
|
|
689
|
+
* @example
|
|
690
|
+
* ```typescript
|
|
691
|
+
* result.expect.range(0, 3).containsAgentHandoff({ newAgentType: MyAgent });
|
|
692
|
+
* ```
|
|
693
|
+
*/
|
|
694
|
+
containsAgentHandoff(options?: AgentHandoffAssertOptions): AgentHandoffAssert {
|
|
695
|
+
for (let idx = 0; idx < this._events.length; idx++) {
|
|
696
|
+
const ev = this._events[idx]!;
|
|
697
|
+
const candidate = new EventAssert(ev, this._parent, this._range.start + idx);
|
|
698
|
+
try {
|
|
699
|
+
return candidate.isAgentHandoff(options);
|
|
700
|
+
} catch {
|
|
701
|
+
// Continue searching
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
this._parent._raiseWithDebugInfo(
|
|
706
|
+
`No AgentHandoffEvent matching criteria found in range [${this._range.start}:${this._range.end}]`,
|
|
707
|
+
);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
462
711
|
/**
|
|
463
712
|
* Assertion wrapper for message events.
|
|
464
713
|
*/
|
|
@@ -473,7 +722,115 @@ export class MessageAssert extends EventAssert {
|
|
|
473
722
|
return this._event;
|
|
474
723
|
}
|
|
475
724
|
|
|
476
|
-
|
|
725
|
+
/**
|
|
726
|
+
* Evaluate whether the message fulfills the given intent using an LLM.
|
|
727
|
+
*
|
|
728
|
+
* @param llm - LLM instance for judgment
|
|
729
|
+
* @param options - Options containing the intent description
|
|
730
|
+
* @returns Self for chaining further assertions
|
|
731
|
+
*
|
|
732
|
+
* @example
|
|
733
|
+
* ```typescript
|
|
734
|
+
* await result.expect
|
|
735
|
+
* .nextEvent()
|
|
736
|
+
* .isMessage({ role: 'assistant' })
|
|
737
|
+
* .judge(llm, { intent: 'should ask for the drink size' });
|
|
738
|
+
* ```
|
|
739
|
+
*/
|
|
740
|
+
async judge(llm: LLM, options: { intent: string }): Promise<MessageAssert> {
|
|
741
|
+
const { intent } = options;
|
|
742
|
+
|
|
743
|
+
// Extract text content from message
|
|
744
|
+
const content = this._event.item.content;
|
|
745
|
+
const msgContent =
|
|
746
|
+
typeof content === 'string'
|
|
747
|
+
? content
|
|
748
|
+
: Array.isArray(content)
|
|
749
|
+
? content.filter((c): c is string => typeof c === 'string').join(' ')
|
|
750
|
+
: '';
|
|
751
|
+
|
|
752
|
+
if (!msgContent) {
|
|
753
|
+
this._raise('The chat message is empty.');
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
if (!intent) {
|
|
757
|
+
this._raise('Intent is required to judge the message.');
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Create the check_intent tool
|
|
761
|
+
const checkIntentTool = tool({
|
|
762
|
+
description:
|
|
763
|
+
'Determines whether the message correctly fulfills the given intent. ' +
|
|
764
|
+
'Returns success=true if the message satisfies the intent, false otherwise. ' +
|
|
765
|
+
'Provide a concise reason justifying the result.',
|
|
766
|
+
parameters: z.object({
|
|
767
|
+
success: z.boolean().describe('Whether the message satisfies the intent'),
|
|
768
|
+
reason: z.string().describe('A concise explanation justifying the result'),
|
|
769
|
+
}),
|
|
770
|
+
execute: async ({ success, reason }: { success: boolean; reason: string }) => {
|
|
771
|
+
return { success, reason };
|
|
772
|
+
},
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
// Create chat context for the judge
|
|
776
|
+
const chatCtx = ChatContext.empty();
|
|
777
|
+
chatCtx.addMessage({
|
|
778
|
+
role: 'system',
|
|
779
|
+
content:
|
|
780
|
+
'You are a test evaluator for conversational agents.\n' +
|
|
781
|
+
'You will be shown a message and a target intent. Determine whether the message accomplishes the intent.\n' +
|
|
782
|
+
'Only respond by calling the `check_intent(success: bool, reason: str)` function with your final judgment.\n' +
|
|
783
|
+
'Be strict: if the message does not clearly fulfill the intent, return `success = false` and explain why.',
|
|
784
|
+
});
|
|
785
|
+
chatCtx.addMessage({
|
|
786
|
+
role: 'user',
|
|
787
|
+
content:
|
|
788
|
+
'Check if the following message fulfills the given intent.\n\n' +
|
|
789
|
+
`Intent:\n${intent}\n\n` +
|
|
790
|
+
`Message:\n${msgContent}`,
|
|
791
|
+
});
|
|
792
|
+
|
|
793
|
+
// Call the LLM with the check_intent tool
|
|
794
|
+
let toolArgs: { success: boolean; reason: string } | undefined;
|
|
795
|
+
|
|
796
|
+
const stream = llm.chat({
|
|
797
|
+
chatCtx,
|
|
798
|
+
toolCtx: { check_intent: checkIntentTool },
|
|
799
|
+
toolChoice: { type: 'function', function: { name: 'check_intent' } },
|
|
800
|
+
extraKwargs: { temperature: 0 },
|
|
801
|
+
});
|
|
802
|
+
|
|
803
|
+
for await (const chunk of stream) {
|
|
804
|
+
if (!chunk.delta) continue;
|
|
805
|
+
|
|
806
|
+
if (chunk.delta.toolCalls && chunk.delta.toolCalls.length > 0) {
|
|
807
|
+
const toolCall = chunk.delta.toolCalls[0]!;
|
|
808
|
+
if (toolCall.args) {
|
|
809
|
+
try {
|
|
810
|
+
toolArgs = JSON.parse(toolCall.args);
|
|
811
|
+
} catch {
|
|
812
|
+
// Args might be streamed incrementally, keep the last valid parse
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
if (!toolArgs) {
|
|
819
|
+
this._raise('LLM did not return any arguments for evaluation.');
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
const { success, reason } = toolArgs;
|
|
823
|
+
|
|
824
|
+
if (!success) {
|
|
825
|
+
this._raise(`Judgment failed: ${reason}`);
|
|
826
|
+
} else if (evalsVerbose) {
|
|
827
|
+
const printMsg =
|
|
828
|
+
msgContent.length > 30 ? msgContent.slice(0, 30).replace(/\n/g, '\\n') + '...' : msgContent;
|
|
829
|
+
console.log(`- Judgment succeeded for \`${printMsg}\`: \`${reason}\``);
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
return this;
|
|
833
|
+
}
|
|
477
834
|
}
|
|
478
835
|
|
|
479
836
|
/**
|
|
@@ -532,6 +889,10 @@ export class AssertionError extends Error {
|
|
|
532
889
|
}
|
|
533
890
|
}
|
|
534
891
|
|
|
892
|
+
// TODO: mockTools() utility for mocking tool implementations in tests
|
|
893
|
+
// Will be implemented for test suites.
|
|
894
|
+
// See Python run_result.py lines 1010-1031 for reference.
|
|
895
|
+
|
|
535
896
|
/**
|
|
536
897
|
* Format events for debug output, optionally marking a selected index.
|
|
537
898
|
*/
|