@langwatch/scenario 0.2.13 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/dist/index.d.mts +433 -256
- package/dist/index.d.ts +433 -256
- package/dist/index.js +2221 -516
- package/dist/index.mjs +2611 -303
- package/dist/integrations/vitest/config.mjs +0 -2
- package/dist/integrations/vitest/reporter.js +36 -11
- package/dist/integrations/vitest/reporter.mjs +159 -8
- package/dist/integrations/vitest/setup-global.mjs +0 -2
- package/dist/integrations/vitest/setup.js +85 -53
- package/dist/integrations/vitest/setup.mjs +619 -18
- package/package.json +46 -30
- package/dist/chunk-6SKQWXT7.mjs +0 -528
- package/dist/chunk-7P6ASYW6.mjs +0 -9
- package/dist/chunk-OL4RFXV4.mjs +0 -133
package/dist/index.d.mts
CHANGED
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
3
|
-
import { z } from 'zod';
|
|
2
|
+
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel, generateText, ModelMessage } from 'ai';
|
|
3
|
+
import { z } from 'zod/v4';
|
|
4
|
+
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
|
+
import { RealtimeSession } from '@openai/agents/realtime';
|
|
4
6
|
import { Observable } from 'rxjs';
|
|
7
|
+
import { z as z$1 } from 'zod';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* The possible return types from an agent's `call` method.
|
|
11
|
+
* - string | CoreMessage | CoreMessage[]: Agent generated response
|
|
12
|
+
* - JudgeResult: Judge made a final decision
|
|
13
|
+
* - null: Judge wants to continue observing (no decision yet)
|
|
14
|
+
*/
|
|
15
|
+
type AgentReturnTypes = string | CoreMessage | CoreMessage[] | JudgeResult | null;
|
|
5
16
|
|
|
6
17
|
declare enum AgentRole {
|
|
7
18
|
USER = "User",
|
|
@@ -42,11 +53,6 @@ interface AgentInput {
|
|
|
42
53
|
*/
|
|
43
54
|
scenarioConfig: ScenarioConfig;
|
|
44
55
|
}
|
|
45
|
-
/**
|
|
46
|
-
* The possible return types from an agent's `call` method.
|
|
47
|
-
* Can be a simple string, a single message, an array of messages, or a ScenarioResult.
|
|
48
|
-
*/
|
|
49
|
-
type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
|
|
50
56
|
/**
|
|
51
57
|
* Abstract base class for integrating custom agents with the Scenario framework.
|
|
52
58
|
*
|
|
@@ -71,6 +77,7 @@ type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
|
|
|
71
77
|
* ```
|
|
72
78
|
*/
|
|
73
79
|
declare abstract class AgentAdapter {
|
|
80
|
+
name?: string;
|
|
74
81
|
role: AgentRole;
|
|
75
82
|
/**
|
|
76
83
|
* Process the input and generate a response.
|
|
@@ -88,33 +95,21 @@ declare abstract class AgentAdapter {
|
|
|
88
95
|
* Abstract base class for user simulator agents.
|
|
89
96
|
* User simulator agents are responsible for generating user messages to drive the conversation.
|
|
90
97
|
*/
|
|
91
|
-
declare abstract class UserSimulatorAgentAdapter
|
|
98
|
+
declare abstract class UserSimulatorAgentAdapter extends AgentAdapter {
|
|
99
|
+
name: string;
|
|
92
100
|
role: AgentRole;
|
|
93
|
-
/**
|
|
94
|
-
* Process the input and generate a user message.
|
|
95
|
-
*
|
|
96
|
-
* @param input AgentInput containing conversation history, thread context, and scenario state.
|
|
97
|
-
* @returns The user's response.
|
|
98
|
-
*/
|
|
99
|
-
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
100
101
|
}
|
|
101
102
|
/**
|
|
102
103
|
* Abstract base class for judge agents.
|
|
103
104
|
* Judge agents are responsible for evaluating the conversation and determining success or failure.
|
|
104
105
|
*/
|
|
105
|
-
declare abstract class JudgeAgentAdapter
|
|
106
|
+
declare abstract class JudgeAgentAdapter extends AgentAdapter {
|
|
107
|
+
name: string;
|
|
106
108
|
role: AgentRole;
|
|
107
109
|
/**
|
|
108
110
|
* The criteria the judge will use to evaluate the conversation.
|
|
109
111
|
*/
|
|
110
112
|
abstract criteria: string[];
|
|
111
|
-
/**
|
|
112
|
-
* Process the input and evaluate the conversation.
|
|
113
|
-
*
|
|
114
|
-
* @param input AgentInput containing conversation history, thread context, and scenario state.
|
|
115
|
-
* @returns A ScenarioResult if the conversation should end, otherwise should continue.
|
|
116
|
-
*/
|
|
117
|
-
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
118
113
|
}
|
|
119
114
|
|
|
120
115
|
declare const DEFAULT_MAX_TURNS = 10;
|
|
@@ -249,7 +244,7 @@ interface ScenarioExecutionLike {
|
|
|
249
244
|
* A step in a scenario script.
|
|
250
245
|
* This is a function that takes the current state and an executor, and performs an action.
|
|
251
246
|
*/
|
|
252
|
-
type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void
|
|
247
|
+
type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void> | void;
|
|
253
248
|
|
|
254
249
|
/**
|
|
255
250
|
* Represents the result of a scenario execution.
|
|
@@ -348,38 +343,14 @@ interface ScenarioExecutionStateLike {
|
|
|
348
343
|
hasToolCall(toolName: string): boolean;
|
|
349
344
|
}
|
|
350
345
|
|
|
351
|
-
/** Default temperature for language model inference */
|
|
352
|
-
declare const DEFAULT_TEMPERATURE = 0;
|
|
353
346
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
354
347
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
355
|
-
model: z.
|
|
348
|
+
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
356
349
|
temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
357
350
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
358
|
-
},
|
|
359
|
-
model: ai.LanguageModelV1;
|
|
360
|
-
temperature: number;
|
|
361
|
-
maxTokens?: number | undefined;
|
|
362
|
-
}, {
|
|
363
|
-
model: ai.LanguageModelV1;
|
|
364
|
-
temperature?: number | undefined;
|
|
365
|
-
maxTokens?: number | undefined;
|
|
366
|
-
}>>;
|
|
351
|
+
}, z.core.$strip>>;
|
|
367
352
|
headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
368
|
-
},
|
|
369
|
-
headless: boolean;
|
|
370
|
-
defaultModel?: {
|
|
371
|
-
model: ai.LanguageModelV1;
|
|
372
|
-
temperature: number;
|
|
373
|
-
maxTokens?: number | undefined;
|
|
374
|
-
} | undefined;
|
|
375
|
-
}, {
|
|
376
|
-
defaultModel?: {
|
|
377
|
-
model: ai.LanguageModelV1;
|
|
378
|
-
temperature?: number | undefined;
|
|
379
|
-
maxTokens?: number | undefined;
|
|
380
|
-
} | undefined;
|
|
381
|
-
headless?: boolean | undefined;
|
|
382
|
-
}>;
|
|
353
|
+
}, z.core.$strict>;
|
|
383
354
|
type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
|
|
384
355
|
declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
|
|
385
356
|
|
|
@@ -390,7 +361,6 @@ type domain_AgentReturnTypes = AgentReturnTypes;
|
|
|
390
361
|
type domain_AgentRole = AgentRole;
|
|
391
362
|
declare const domain_AgentRole: typeof AgentRole;
|
|
392
363
|
declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
|
|
393
|
-
declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
|
|
394
364
|
declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
|
|
395
365
|
type domain_JudgeAgentAdapter = JudgeAgentAdapter;
|
|
396
366
|
declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
@@ -407,32 +377,33 @@ declare const domain_allAgentRoles: typeof allAgentRoles;
|
|
|
407
377
|
declare const domain_defineConfig: typeof defineConfig;
|
|
408
378
|
declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
|
|
409
379
|
declare namespace domain {
|
|
410
|
-
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS,
|
|
380
|
+
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
411
381
|
}
|
|
412
382
|
|
|
413
383
|
/**
|
|
414
|
-
*
|
|
384
|
+
* Schema for a language model.
|
|
415
385
|
*/
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
386
|
+
declare const modelSchema: z.ZodObject<{
|
|
387
|
+
model: z.ZodCustom<LanguageModel, LanguageModel>;
|
|
388
|
+
temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
389
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
390
|
+
}, z.core.$strip>;
|
|
391
|
+
type ModelConfig = z.infer<typeof modelSchema>;
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Parameters for LLM invocation.
|
|
395
|
+
* Derived from generateText parameters for now.
|
|
396
|
+
*/
|
|
397
|
+
type InvokeLLMParams = Parameters<typeof generateText>[0];
|
|
398
|
+
/**
|
|
399
|
+
* Result from LLM invocation.
|
|
400
|
+
* Derived from generateText return type for now.
|
|
401
|
+
*/
|
|
402
|
+
type InvokeLLMResult = Pick<Awaited<ReturnType<typeof generateText>>, "text" | "content" | "toolCalls" | "toolResults">;
|
|
432
403
|
/**
|
|
433
404
|
* General configuration for a testing agent.
|
|
434
405
|
*/
|
|
435
|
-
interface TestingAgentConfig extends
|
|
406
|
+
interface TestingAgentConfig extends Partial<ModelConfig> {
|
|
436
407
|
/**
|
|
437
408
|
* The name of the agent.
|
|
438
409
|
*/
|
|
@@ -464,6 +435,35 @@ interface FinishTestArgs {
|
|
|
464
435
|
verdict: "success" | "failure" | "inconclusive";
|
|
465
436
|
}
|
|
466
437
|
|
|
438
|
+
interface JudgeResult {
|
|
439
|
+
success: boolean;
|
|
440
|
+
reasoning: string;
|
|
441
|
+
metCriteria: string[];
|
|
442
|
+
unmetCriteria: string[];
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Collects OpenTelemetry spans for judge evaluation.
|
|
447
|
+
* Implements SpanProcessor to intercept spans as they complete.
|
|
448
|
+
*/
|
|
449
|
+
declare class JudgeSpanCollector implements SpanProcessor {
|
|
450
|
+
private spans;
|
|
451
|
+
onStart(): void;
|
|
452
|
+
onEnd(span: ReadableSpan): void;
|
|
453
|
+
forceFlush(): Promise<void>;
|
|
454
|
+
shutdown(): Promise<void>;
|
|
455
|
+
/**
|
|
456
|
+
* Retrieves all spans associated with a specific thread.
|
|
457
|
+
* @param threadId - The thread identifier to filter spans by
|
|
458
|
+
* @returns Array of spans for the given thread
|
|
459
|
+
*/
|
|
460
|
+
getSpansForThread(threadId: string): ReadableSpan[];
|
|
461
|
+
}
|
|
462
|
+
/**
|
|
463
|
+
* Singleton instance of the judge span collector.
|
|
464
|
+
*/
|
|
465
|
+
declare const judgeSpanCollector: JudgeSpanCollector;
|
|
466
|
+
|
|
467
467
|
/**
|
|
468
468
|
* Configuration for the judge agent.
|
|
469
469
|
*/
|
|
@@ -476,6 +476,10 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
476
476
|
* The criteria that the judge will use to evaluate the conversation.
|
|
477
477
|
*/
|
|
478
478
|
criteria: string[];
|
|
479
|
+
/**
|
|
480
|
+
* Optional span collector for telemetry. Defaults to global singleton.
|
|
481
|
+
*/
|
|
482
|
+
spanCollector?: JudgeSpanCollector;
|
|
479
483
|
}
|
|
480
484
|
/**
|
|
481
485
|
* Agent that evaluates conversations against success criteria.
|
|
@@ -489,17 +493,16 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
489
493
|
declare class JudgeAgent extends JudgeAgentAdapter {
|
|
490
494
|
private readonly cfg;
|
|
491
495
|
private logger;
|
|
496
|
+
private readonly spanCollector;
|
|
492
497
|
role: AgentRole;
|
|
493
498
|
criteria: string[];
|
|
499
|
+
/**
|
|
500
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
501
|
+
*/
|
|
502
|
+
invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
|
|
494
503
|
constructor(cfg: JudgeAgentConfig);
|
|
495
|
-
call(input: AgentInput): Promise<
|
|
496
|
-
|
|
497
|
-
messages: CoreMessage[];
|
|
498
|
-
reasoning: string;
|
|
499
|
-
metCriteria: string[];
|
|
500
|
-
unmetCriteria: string[];
|
|
501
|
-
}>;
|
|
502
|
-
private generateText;
|
|
504
|
+
call(input: AgentInput): Promise<JudgeResult | null>;
|
|
505
|
+
private getOpenTelemetryTracesDigest;
|
|
503
506
|
}
|
|
504
507
|
/**
|
|
505
508
|
* Factory function for creating JudgeAgent instances.
|
|
@@ -553,15 +556,54 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
553
556
|
*/
|
|
554
557
|
declare const judgeAgent: (cfg: JudgeAgentConfig) => JudgeAgent;
|
|
555
558
|
|
|
559
|
+
/**
|
|
560
|
+
* Transforms OpenTelemetry spans into a complete plain-text digest for judge evaluation.
|
|
561
|
+
* Deduplicates repeated string content to reduce token usage.
|
|
562
|
+
*/
|
|
563
|
+
declare class JudgeSpanDigestFormatter {
|
|
564
|
+
private readonly logger;
|
|
565
|
+
private readonly deduplicator;
|
|
566
|
+
/**
|
|
567
|
+
* Formats spans into a complete digest with full content and nesting.
|
|
568
|
+
* @param spans - All spans for a thread
|
|
569
|
+
* @returns Plain text digest
|
|
570
|
+
*/
|
|
571
|
+
format(spans: ReadableSpan[]): string;
|
|
572
|
+
private sortByStartTime;
|
|
573
|
+
private buildHierarchy;
|
|
574
|
+
private renderNode;
|
|
575
|
+
private getTreePrefix;
|
|
576
|
+
private getAttrIndent;
|
|
577
|
+
private cleanAttributes;
|
|
578
|
+
private formatValue;
|
|
579
|
+
private transformValue;
|
|
580
|
+
private transformString;
|
|
581
|
+
private looksLikeJson;
|
|
582
|
+
private hrTimeToMs;
|
|
583
|
+
private calculateSpanDuration;
|
|
584
|
+
private calculateTotalDuration;
|
|
585
|
+
private formatDuration;
|
|
586
|
+
private formatTimestamp;
|
|
587
|
+
private getStatusIndicator;
|
|
588
|
+
private collectErrors;
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Singleton instance for convenience.
|
|
592
|
+
*/
|
|
593
|
+
declare const judgeSpanDigestFormatter: JudgeSpanDigestFormatter;
|
|
594
|
+
|
|
556
595
|
declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
557
596
|
private readonly cfg?;
|
|
558
597
|
private logger;
|
|
598
|
+
/**
|
|
599
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
600
|
+
*/
|
|
601
|
+
invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
|
|
559
602
|
constructor(cfg?: TestingAgentConfig | undefined);
|
|
560
603
|
call: (input: AgentInput) => Promise<{
|
|
561
604
|
role: "user";
|
|
562
605
|
content: string;
|
|
563
606
|
}>;
|
|
564
|
-
private generateText;
|
|
565
607
|
}
|
|
566
608
|
/**
|
|
567
609
|
* Agent that simulates realistic user behavior in scenario conversations.
|
|
@@ -654,14 +696,169 @@ declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
|
654
696
|
*/
|
|
655
697
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => UserSimulatorAgent;
|
|
656
698
|
|
|
699
|
+
/**
|
|
700
|
+
* Event emitted when an audio response is completed
|
|
701
|
+
*/
|
|
702
|
+
interface AudioResponseEvent {
|
|
703
|
+
transcript: string;
|
|
704
|
+
audio: string;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
/**
|
|
708
|
+
* Realtime Agent Adapter for Scenario Testing
|
|
709
|
+
*
|
|
710
|
+
* Adapts a connected RealtimeSession to the Scenario framework interface.
|
|
711
|
+
* The session must be created and connected before passing to this adapter.
|
|
712
|
+
*
|
|
713
|
+
* This ensures we test the REAL agent, not a mock, using the same session
|
|
714
|
+
* creation pattern as the browser client.
|
|
715
|
+
*/
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Configuration for RealtimeAgentAdapter
|
|
719
|
+
*/
|
|
720
|
+
interface RealtimeAgentAdapterConfig {
|
|
721
|
+
/**
|
|
722
|
+
* The role of the agent
|
|
723
|
+
*/
|
|
724
|
+
role: AgentRole;
|
|
725
|
+
/**
|
|
726
|
+
* A connected RealtimeSession instance
|
|
727
|
+
*
|
|
728
|
+
* The session should be created using your agent's session creator function
|
|
729
|
+
* and connected before passing to this adapter.
|
|
730
|
+
*
|
|
731
|
+
* @example
|
|
732
|
+
* ```typescript
|
|
733
|
+
* const session = createVegetarianRecipeSession();
|
|
734
|
+
* await session.connect({ apiKey: process.env.OPENAI_API_KEY });
|
|
735
|
+
* const adapter = new RealtimeAgentAdapter({
|
|
736
|
+
* session,
|
|
737
|
+
* role: AgentRole.AGENT,
|
|
738
|
+
* agentName: "Vegetarian Recipe Assistant"
|
|
739
|
+
* });
|
|
740
|
+
* ```
|
|
741
|
+
*/
|
|
742
|
+
session: RealtimeSession;
|
|
743
|
+
/**
|
|
744
|
+
* Name of the agent (for logging/identification)
|
|
745
|
+
*/
|
|
746
|
+
agentName: string;
|
|
747
|
+
/**
|
|
748
|
+
* Timeout for waiting for agent response (ms)
|
|
749
|
+
* @default 30000
|
|
750
|
+
*/
|
|
751
|
+
responseTimeout?: number;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Adapter that connects Scenario testing framework to OpenAI Realtime API
|
|
755
|
+
*
|
|
756
|
+
* This adapter wraps a connected RealtimeSession to provide the Scenario
|
|
757
|
+
* framework interface. The session must be created and connected externally,
|
|
758
|
+
* ensuring the same session creation pattern is used in both browser and tests.
|
|
759
|
+
*
|
|
760
|
+
* @example
|
|
761
|
+
* ```typescript
|
|
762
|
+
* // In beforeAll
|
|
763
|
+
* const session = createVegetarianRecipeSession();
|
|
764
|
+
* await session.connect({ apiKey: process.env.OPENAI_API_KEY });
|
|
765
|
+
* const adapter = new RealtimeAgentAdapter({
|
|
766
|
+
* session,
|
|
767
|
+
* role: AgentRole.AGENT
|
|
768
|
+
* });
|
|
769
|
+
*
|
|
770
|
+
* // In test
|
|
771
|
+
* await scenario.run({
|
|
772
|
+
* agents: [adapter, scenario.userSimulatorAgent()],
|
|
773
|
+
* script: [scenario.user("quick recipe"), scenario.agent()]
|
|
774
|
+
* });
|
|
775
|
+
*
|
|
776
|
+
* // In afterAll
|
|
777
|
+
* session.close();
|
|
778
|
+
* ```
|
|
779
|
+
*/
|
|
780
|
+
declare class RealtimeAgentAdapter extends AgentAdapter {
|
|
781
|
+
private config;
|
|
782
|
+
role: AgentRole;
|
|
783
|
+
name: string;
|
|
784
|
+
private session;
|
|
785
|
+
private eventHandler;
|
|
786
|
+
private messageProcessor;
|
|
787
|
+
private responseFormatter;
|
|
788
|
+
private audioEvents;
|
|
789
|
+
/**
|
|
790
|
+
* Creates a new RealtimeAgentAdapter instance
|
|
791
|
+
*
|
|
792
|
+
* The session can be either connected or unconnected.
|
|
793
|
+
* If unconnected, call connect() with an API key before use.
|
|
794
|
+
*
|
|
795
|
+
* @param config - Configuration for the realtime agent adapter
|
|
796
|
+
*/
|
|
797
|
+
constructor(config: RealtimeAgentAdapterConfig);
|
|
798
|
+
/**
|
|
799
|
+
* Get the connect method from the session
|
|
800
|
+
*/
|
|
801
|
+
connect(params?: Parameters<RealtimeSession["connect"]>[0] | undefined): Promise<void>;
|
|
802
|
+
/**
|
|
803
|
+
* Closes the session connection
|
|
804
|
+
*/
|
|
805
|
+
disconnect(): Promise<void>;
|
|
806
|
+
/**
|
|
807
|
+
* Process input and generate response (implements AgentAdapter interface)
|
|
808
|
+
*
|
|
809
|
+
* This is called by Scenario framework for each agent turn.
|
|
810
|
+
* Handles both text and audio input, returns audio message with transcript.
|
|
811
|
+
*
|
|
812
|
+
* @param input - Scenario agent input with message history
|
|
813
|
+
* @returns Agent response as audio message or text
|
|
814
|
+
*/
|
|
815
|
+
call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
816
|
+
/**
|
|
817
|
+
* Handles the initial response when no user message exists
|
|
818
|
+
*/
|
|
819
|
+
private handleInitialResponse;
|
|
820
|
+
/**
|
|
821
|
+
* Handles audio input from the user
|
|
822
|
+
*/
|
|
823
|
+
private handleAudioInput;
|
|
824
|
+
/**
|
|
825
|
+
* Handles text input from the user
|
|
826
|
+
*/
|
|
827
|
+
private handleTextInput;
|
|
828
|
+
/**
|
|
829
|
+
* Subscribe to audio response events
|
|
830
|
+
*
|
|
831
|
+
* @param callback - Function called when an audio response completes
|
|
832
|
+
*/
|
|
833
|
+
onAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
834
|
+
/**
|
|
835
|
+
* Remove audio response listener
|
|
836
|
+
*
|
|
837
|
+
* @param callback - The callback function to remove
|
|
838
|
+
*/
|
|
839
|
+
offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
657
843
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
844
|
+
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
845
|
+
type agents_InvokeLLMResult = InvokeLLMResult;
|
|
658
846
|
type agents_JudgeAgentConfig = JudgeAgentConfig;
|
|
847
|
+
type agents_JudgeResult = JudgeResult;
|
|
848
|
+
type agents_JudgeSpanCollector = JudgeSpanCollector;
|
|
849
|
+
declare const agents_JudgeSpanCollector: typeof JudgeSpanCollector;
|
|
850
|
+
type agents_JudgeSpanDigestFormatter = JudgeSpanDigestFormatter;
|
|
851
|
+
declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
852
|
+
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
853
|
+
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
854
|
+
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
659
855
|
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
660
|
-
type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
|
|
661
856
|
declare const agents_judgeAgent: typeof judgeAgent;
|
|
857
|
+
declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
|
|
858
|
+
declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
|
|
662
859
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
663
860
|
declare namespace agents {
|
|
664
|
-
export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type
|
|
861
|
+
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
|
|
665
862
|
}
|
|
666
863
|
|
|
667
864
|
/**
|
|
@@ -690,26 +887,26 @@ declare enum ScenarioRunStatus {
|
|
|
690
887
|
* Discriminated union of all possible scenario event types.
|
|
691
888
|
* Enables type-safe handling of different event types based on the 'type' field.
|
|
692
889
|
*/
|
|
693
|
-
declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
|
|
694
|
-
timestamp: z.ZodNumber;
|
|
695
|
-
rawEvent: z.ZodOptional<z.ZodAny>;
|
|
696
|
-
batchRunId: z.ZodString;
|
|
697
|
-
scenarioId: z.ZodString;
|
|
698
|
-
scenarioRunId: z.ZodString;
|
|
699
|
-
scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
890
|
+
declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObject<{
|
|
891
|
+
timestamp: z$1.ZodNumber;
|
|
892
|
+
rawEvent: z$1.ZodOptional<z$1.ZodAny>;
|
|
893
|
+
batchRunId: z$1.ZodString;
|
|
894
|
+
scenarioId: z$1.ZodString;
|
|
895
|
+
scenarioRunId: z$1.ZodString;
|
|
896
|
+
scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
|
|
700
897
|
} & {
|
|
701
|
-
type: z.ZodLiteral<ScenarioEventType.RUN_STARTED>;
|
|
702
|
-
metadata: z.ZodObject<{
|
|
703
|
-
name: z.ZodOptional<z.ZodString>;
|
|
704
|
-
description: z.ZodOptional<z.ZodString>;
|
|
705
|
-
}, "strip", z.ZodTypeAny, {
|
|
706
|
-
name?: string | undefined;
|
|
898
|
+
type: z$1.ZodLiteral<ScenarioEventType.RUN_STARTED>;
|
|
899
|
+
metadata: z$1.ZodObject<{
|
|
900
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
901
|
+
description: z$1.ZodOptional<z$1.ZodString>;
|
|
902
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
707
903
|
description?: string | undefined;
|
|
708
|
-
}, {
|
|
709
904
|
name?: string | undefined;
|
|
905
|
+
}, {
|
|
710
906
|
description?: string | undefined;
|
|
907
|
+
name?: string | undefined;
|
|
711
908
|
}>;
|
|
712
|
-
}, "strip", z.ZodTypeAny, {
|
|
909
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
713
910
|
type: ScenarioEventType.RUN_STARTED;
|
|
714
911
|
timestamp: number;
|
|
715
912
|
batchRunId: string;
|
|
@@ -717,8 +914,8 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
717
914
|
scenarioRunId: string;
|
|
718
915
|
scenarioSetId: string;
|
|
719
916
|
metadata: {
|
|
720
|
-
name?: string | undefined;
|
|
721
917
|
description?: string | undefined;
|
|
918
|
+
name?: string | undefined;
|
|
722
919
|
};
|
|
723
920
|
rawEvent?: any;
|
|
724
921
|
}, {
|
|
@@ -728,41 +925,41 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
728
925
|
scenarioId: string;
|
|
729
926
|
scenarioRunId: string;
|
|
730
927
|
metadata: {
|
|
731
|
-
name?: string | undefined;
|
|
732
928
|
description?: string | undefined;
|
|
929
|
+
name?: string | undefined;
|
|
733
930
|
};
|
|
734
931
|
rawEvent?: any;
|
|
735
932
|
scenarioSetId?: string | undefined;
|
|
736
|
-
}>, z.ZodObject<{
|
|
737
|
-
timestamp: z.ZodNumber;
|
|
738
|
-
rawEvent: z.ZodOptional<z.ZodAny>;
|
|
739
|
-
batchRunId: z.ZodString;
|
|
740
|
-
scenarioId: z.ZodString;
|
|
741
|
-
scenarioRunId: z.ZodString;
|
|
742
|
-
scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
933
|
+
}>, z$1.ZodObject<{
|
|
934
|
+
timestamp: z$1.ZodNumber;
|
|
935
|
+
rawEvent: z$1.ZodOptional<z$1.ZodAny>;
|
|
936
|
+
batchRunId: z$1.ZodString;
|
|
937
|
+
scenarioId: z$1.ZodString;
|
|
938
|
+
scenarioRunId: z$1.ZodString;
|
|
939
|
+
scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
|
|
743
940
|
} & {
|
|
744
|
-
type: z.ZodLiteral<ScenarioEventType.RUN_FINISHED>;
|
|
745
|
-
status: z.ZodNativeEnum<typeof ScenarioRunStatus>;
|
|
746
|
-
results: z.ZodNullable<z.ZodOptional<z.ZodObject<{
|
|
747
|
-
verdict: z.ZodNativeEnum<typeof Verdict>;
|
|
748
|
-
reasoning: z.ZodOptional<z.ZodString>;
|
|
749
|
-
metCriteria: z.ZodArray<z.ZodString, "many">;
|
|
750
|
-
unmetCriteria: z.ZodArray<z.ZodString, "many">;
|
|
751
|
-
error: z.ZodOptional<z.ZodString>;
|
|
752
|
-
}, "strip", z.ZodTypeAny, {
|
|
941
|
+
type: z$1.ZodLiteral<ScenarioEventType.RUN_FINISHED>;
|
|
942
|
+
status: z$1.ZodNativeEnum<typeof ScenarioRunStatus>;
|
|
943
|
+
results: z$1.ZodNullable<z$1.ZodOptional<z$1.ZodObject<{
|
|
944
|
+
verdict: z$1.ZodNativeEnum<typeof Verdict>;
|
|
945
|
+
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
946
|
+
metCriteria: z$1.ZodArray<z$1.ZodString, "many">;
|
|
947
|
+
unmetCriteria: z$1.ZodArray<z$1.ZodString, "many">;
|
|
948
|
+
error: z$1.ZodOptional<z$1.ZodString>;
|
|
949
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
753
950
|
verdict: Verdict;
|
|
754
951
|
metCriteria: string[];
|
|
755
952
|
unmetCriteria: string[];
|
|
756
|
-
reasoning?: string | undefined;
|
|
757
953
|
error?: string | undefined;
|
|
954
|
+
reasoning?: string | undefined;
|
|
758
955
|
}, {
|
|
759
956
|
verdict: Verdict;
|
|
760
957
|
metCriteria: string[];
|
|
761
958
|
unmetCriteria: string[];
|
|
762
|
-
reasoning?: string | undefined;
|
|
763
959
|
error?: string | undefined;
|
|
960
|
+
reasoning?: string | undefined;
|
|
764
961
|
}>>>;
|
|
765
|
-
}, "strip", z.ZodTypeAny, {
|
|
962
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
766
963
|
type: ScenarioEventType.RUN_FINISHED;
|
|
767
964
|
status: ScenarioRunStatus;
|
|
768
965
|
timestamp: number;
|
|
@@ -775,8 +972,8 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
775
972
|
verdict: Verdict;
|
|
776
973
|
metCriteria: string[];
|
|
777
974
|
unmetCriteria: string[];
|
|
778
|
-
reasoning?: string | undefined;
|
|
779
975
|
error?: string | undefined;
|
|
976
|
+
reasoning?: string | undefined;
|
|
780
977
|
} | null | undefined;
|
|
781
978
|
}, {
|
|
782
979
|
type: ScenarioEventType.RUN_FINISHED;
|
|
@@ -791,19 +988,19 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
791
988
|
verdict: Verdict;
|
|
792
989
|
metCriteria: string[];
|
|
793
990
|
unmetCriteria: string[];
|
|
794
|
-
reasoning?: string | undefined;
|
|
795
991
|
error?: string | undefined;
|
|
992
|
+
reasoning?: string | undefined;
|
|
796
993
|
} | null | undefined;
|
|
797
|
-
}>, z.ZodObject<{
|
|
798
|
-
messages: z.ZodArray<z.ZodDiscriminatedUnion<"role", [z.ZodObject<z.objectUtil.extendShape<{
|
|
799
|
-
id: z.ZodString;
|
|
800
|
-
role: z.ZodString;
|
|
801
|
-
content: z.ZodOptional<z.ZodString>;
|
|
802
|
-
name: z.ZodOptional<z.ZodString>;
|
|
994
|
+
}>, z$1.ZodObject<{
|
|
995
|
+
messages: z$1.ZodArray<z$1.ZodDiscriminatedUnion<"role", [z$1.ZodObject<z$1.objectUtil.extendShape<{
|
|
996
|
+
id: z$1.ZodString;
|
|
997
|
+
role: z$1.ZodString;
|
|
998
|
+
content: z$1.ZodOptional<z$1.ZodString>;
|
|
999
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
803
1000
|
}, {
|
|
804
|
-
role: z.ZodLiteral<"developer">;
|
|
805
|
-
content: z.ZodString;
|
|
806
|
-
}>, "strip", z.ZodTypeAny, {
|
|
1001
|
+
role: z$1.ZodLiteral<"developer">;
|
|
1002
|
+
content: z$1.ZodString;
|
|
1003
|
+
}>, "strip", z$1.ZodTypeAny, {
|
|
807
1004
|
id: string;
|
|
808
1005
|
role: "developer";
|
|
809
1006
|
content: string;
|
|
@@ -813,15 +1010,15 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
813
1010
|
role: "developer";
|
|
814
1011
|
content: string;
|
|
815
1012
|
name?: string | undefined;
|
|
816
|
-
}>, z.ZodObject<z.objectUtil.extendShape<{
|
|
817
|
-
id: z.ZodString;
|
|
818
|
-
role: z.ZodString;
|
|
819
|
-
content: z.ZodOptional<z.ZodString>;
|
|
820
|
-
name: z.ZodOptional<z.ZodString>;
|
|
1013
|
+
}>, z$1.ZodObject<z$1.objectUtil.extendShape<{
|
|
1014
|
+
id: z$1.ZodString;
|
|
1015
|
+
role: z$1.ZodString;
|
|
1016
|
+
content: z$1.ZodOptional<z$1.ZodString>;
|
|
1017
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
821
1018
|
}, {
|
|
822
|
-
role: z.ZodLiteral<"system">;
|
|
823
|
-
content: z.ZodString;
|
|
824
|
-
}>, "strip", z.ZodTypeAny, {
|
|
1019
|
+
role: z$1.ZodLiteral<"system">;
|
|
1020
|
+
content: z$1.ZodString;
|
|
1021
|
+
}>, "strip", z$1.ZodTypeAny, {
|
|
825
1022
|
id: string;
|
|
826
1023
|
role: "system";
|
|
827
1024
|
content: string;
|
|
@@ -831,28 +1028,28 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
831
1028
|
role: "system";
|
|
832
1029
|
content: string;
|
|
833
1030
|
name?: string | undefined;
|
|
834
|
-
}>, z.ZodObject<z.objectUtil.extendShape<{
|
|
835
|
-
id: z.ZodString;
|
|
836
|
-
role: z.ZodString;
|
|
837
|
-
content: z.ZodOptional<z.ZodString>;
|
|
838
|
-
name: z.ZodOptional<z.ZodString>;
|
|
1031
|
+
}>, z$1.ZodObject<z$1.objectUtil.extendShape<{
|
|
1032
|
+
id: z$1.ZodString;
|
|
1033
|
+
role: z$1.ZodString;
|
|
1034
|
+
content: z$1.ZodOptional<z$1.ZodString>;
|
|
1035
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
839
1036
|
}, {
|
|
840
|
-
role: z.ZodLiteral<"assistant">;
|
|
841
|
-
content: z.ZodOptional<z.ZodString>;
|
|
842
|
-
toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
843
|
-
id: z.ZodString;
|
|
844
|
-
type: z.ZodLiteral<"function">;
|
|
845
|
-
function: z.ZodObject<{
|
|
846
|
-
name: z.ZodString;
|
|
847
|
-
arguments: z.ZodString;
|
|
848
|
-
}, "strip", z.ZodTypeAny, {
|
|
1037
|
+
role: z$1.ZodLiteral<"assistant">;
|
|
1038
|
+
content: z$1.ZodOptional<z$1.ZodString>;
|
|
1039
|
+
toolCalls: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1040
|
+
id: z$1.ZodString;
|
|
1041
|
+
type: z$1.ZodLiteral<"function">;
|
|
1042
|
+
function: z$1.ZodObject<{
|
|
1043
|
+
name: z$1.ZodString;
|
|
1044
|
+
arguments: z$1.ZodString;
|
|
1045
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
849
1046
|
name: string;
|
|
850
1047
|
arguments: string;
|
|
851
1048
|
}, {
|
|
852
1049
|
name: string;
|
|
853
1050
|
arguments: string;
|
|
854
1051
|
}>;
|
|
855
|
-
}, "strip", z.ZodTypeAny, {
|
|
1052
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
856
1053
|
function: {
|
|
857
1054
|
name: string;
|
|
858
1055
|
arguments: string;
|
|
@@ -867,7 +1064,7 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
867
1064
|
type: "function";
|
|
868
1065
|
id: string;
|
|
869
1066
|
}>, "many">>;
|
|
870
|
-
}>, "strip", z.ZodTypeAny, {
|
|
1067
|
+
}>, "strip", z$1.ZodTypeAny, {
|
|
871
1068
|
id: string;
|
|
872
1069
|
role: "assistant";
|
|
873
1070
|
name?: string | undefined;
|
|
@@ -893,15 +1090,15 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
893
1090
|
type: "function";
|
|
894
1091
|
id: string;
|
|
895
1092
|
}[] | undefined;
|
|
896
|
-
}>, z.ZodObject<z.objectUtil.extendShape<{
|
|
897
|
-
id: z.ZodString;
|
|
898
|
-
role: z.ZodString;
|
|
899
|
-
content: z.ZodOptional<z.ZodString>;
|
|
900
|
-
name: z.ZodOptional<z.ZodString>;
|
|
1093
|
+
}>, z$1.ZodObject<z$1.objectUtil.extendShape<{
|
|
1094
|
+
id: z$1.ZodString;
|
|
1095
|
+
role: z$1.ZodString;
|
|
1096
|
+
content: z$1.ZodOptional<z$1.ZodString>;
|
|
1097
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
901
1098
|
}, {
|
|
902
|
-
role: z.ZodLiteral<"user">;
|
|
903
|
-
content: z.ZodString;
|
|
904
|
-
}>, "strip", z.ZodTypeAny, {
|
|
1099
|
+
role: z$1.ZodLiteral<"user">;
|
|
1100
|
+
content: z$1.ZodString;
|
|
1101
|
+
}>, "strip", z$1.ZodTypeAny, {
|
|
905
1102
|
id: string;
|
|
906
1103
|
role: "user";
|
|
907
1104
|
content: string;
|
|
@@ -911,12 +1108,12 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
911
1108
|
role: "user";
|
|
912
1109
|
content: string;
|
|
913
1110
|
name?: string | undefined;
|
|
914
|
-
}>, z.ZodObject<{
|
|
915
|
-
id: z.ZodString;
|
|
916
|
-
content: z.ZodString;
|
|
917
|
-
role: z.ZodLiteral<"tool">;
|
|
918
|
-
toolCallId: z.ZodString;
|
|
919
|
-
}, "strip", z.ZodTypeAny, {
|
|
1111
|
+
}>, z$1.ZodObject<{
|
|
1112
|
+
id: z$1.ZodString;
|
|
1113
|
+
content: z$1.ZodString;
|
|
1114
|
+
role: z$1.ZodLiteral<"tool">;
|
|
1115
|
+
toolCallId: z$1.ZodString;
|
|
1116
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
920
1117
|
id: string;
|
|
921
1118
|
role: "tool";
|
|
922
1119
|
content: string;
|
|
@@ -928,14 +1125,14 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
928
1125
|
toolCallId: string;
|
|
929
1126
|
}>]>, "many">;
|
|
930
1127
|
} & {
|
|
931
|
-
timestamp: z.ZodNumber;
|
|
932
|
-
rawEvent: z.ZodOptional<z.ZodAny>;
|
|
933
|
-
batchRunId: z.ZodString;
|
|
934
|
-
scenarioId: z.ZodString;
|
|
935
|
-
scenarioRunId: z.ZodString;
|
|
936
|
-
scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
937
|
-
type: z.ZodLiteral<ScenarioEventType.MESSAGE_SNAPSHOT>;
|
|
938
|
-
}, "strip", z.ZodTypeAny, {
|
|
1128
|
+
timestamp: z$1.ZodNumber;
|
|
1129
|
+
rawEvent: z$1.ZodOptional<z$1.ZodAny>;
|
|
1130
|
+
batchRunId: z$1.ZodString;
|
|
1131
|
+
scenarioId: z$1.ZodString;
|
|
1132
|
+
scenarioRunId: z$1.ZodString;
|
|
1133
|
+
scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
|
|
1134
|
+
type: z$1.ZodLiteral<ScenarioEventType.MESSAGE_SNAPSHOT>;
|
|
1135
|
+
}, "strip", z$1.ZodTypeAny, {
|
|
939
1136
|
type: ScenarioEventType.MESSAGE_SNAPSHOT;
|
|
940
1137
|
messages: ({
|
|
941
1138
|
id: string;
|
|
@@ -1020,7 +1217,7 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
1020
1217
|
rawEvent?: any;
|
|
1021
1218
|
scenarioSetId?: string | undefined;
|
|
1022
1219
|
}>]>;
|
|
1023
|
-
type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
|
|
1220
|
+
type ScenarioEvent = z$1.infer<typeof scenarioEventSchema>;
|
|
1024
1221
|
|
|
1025
1222
|
/**
|
|
1026
1223
|
* Manages the execution of a single scenario test.
|
|
@@ -1107,8 +1304,12 @@ type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
|
|
|
1107
1304
|
* ```
|
|
1108
1305
|
*/
|
|
1109
1306
|
declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
1307
|
+
/** LangWatch tracer for scenario execution */
|
|
1308
|
+
private tracer;
|
|
1110
1309
|
/** The current state of the scenario execution */
|
|
1111
1310
|
private state;
|
|
1311
|
+
/** The final result of the scenario execution, set when a conclusion is reached */
|
|
1312
|
+
private _result?;
|
|
1112
1313
|
/** Logger for debugging and monitoring */
|
|
1113
1314
|
private logger;
|
|
1114
1315
|
/** Finalized configuration with all defaults applied */
|
|
@@ -1127,10 +1328,10 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1127
1328
|
* Key: agent index, Value: array of pending messages for that agent
|
|
1128
1329
|
*/
|
|
1129
1330
|
private pendingMessages;
|
|
1130
|
-
/** Intermediate result set by agents that make final decisions */
|
|
1131
|
-
private partialResult;
|
|
1132
1331
|
/** Accumulated execution time for each agent (for performance tracking) */
|
|
1133
1332
|
private agentTimes;
|
|
1333
|
+
/** Current turn span for trace context management */
|
|
1334
|
+
private currentTurnSpan?;
|
|
1134
1335
|
/** Timestamp when execution started (for total time calculation) */
|
|
1135
1336
|
private totalStartTime;
|
|
1136
1337
|
/** Event stream for monitoring scenario progress */
|
|
@@ -1155,9 +1356,9 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1155
1356
|
/**
|
|
1156
1357
|
* Gets the complete conversation history as an array of messages.
|
|
1157
1358
|
*
|
|
1158
|
-
* @returns Array of
|
|
1359
|
+
* @returns Array of ModelMessage objects representing the full conversation
|
|
1159
1360
|
*/
|
|
1160
|
-
get messages():
|
|
1361
|
+
get messages(): ModelMessage[];
|
|
1161
1362
|
/**
|
|
1162
1363
|
* Gets the unique identifier for the conversation thread.
|
|
1163
1364
|
* This ID is used to maintain conversation context across multiple runs.
|
|
@@ -1165,6 +1366,20 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1165
1366
|
* @returns The thread identifier string
|
|
1166
1367
|
*/
|
|
1167
1368
|
get threadId(): string;
|
|
1369
|
+
/**
|
|
1370
|
+
* Gets the result of the scenario execution if it has been set.
|
|
1371
|
+
*
|
|
1372
|
+
* @returns The scenario result or undefined if not yet set
|
|
1373
|
+
*/
|
|
1374
|
+
get result(): ScenarioResult | undefined;
|
|
1375
|
+
/**
|
|
1376
|
+
* Sets the result of the scenario execution.
|
|
1377
|
+
* This is called when the scenario reaches a conclusion (success or failure).
|
|
1378
|
+
* Automatically includes messages, totalTime, and agentTime from the current execution context.
|
|
1379
|
+
*
|
|
1380
|
+
* @param result - The final scenario result (without messages/timing, which will be added automatically)
|
|
1381
|
+
*/
|
|
1382
|
+
private setResult;
|
|
1168
1383
|
/**
|
|
1169
1384
|
* The total elapsed time for the scenario execution.
|
|
1170
1385
|
*/
|
|
@@ -1207,30 +1422,25 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1207
1422
|
* - Progress to the next turn if needed
|
|
1208
1423
|
* - Find the next agent that should act
|
|
1209
1424
|
* - Execute that agent's response
|
|
1210
|
-
* -
|
|
1425
|
+
* - Set the result if the scenario concludes
|
|
1211
1426
|
*
|
|
1212
1427
|
* Note: This method is primarily for debugging or custom execution flows. Most users
|
|
1213
1428
|
* will use `execute()` to run the entire scenario automatically.
|
|
1214
1429
|
*
|
|
1215
|
-
*
|
|
1216
|
-
* - Array of new messages added during the agent interaction, or
|
|
1217
|
-
* - A final ScenarioResult if the interaction concludes the scenario
|
|
1218
|
-
* @throws Error if no result is returned from the step
|
|
1430
|
+
* After calling this method, check `this.result` to see if the scenario has concluded.
|
|
1219
1431
|
*
|
|
1220
1432
|
* @example
|
|
1221
1433
|
* ```typescript
|
|
1222
1434
|
* const execution = new ScenarioExecution(config, script);
|
|
1223
1435
|
*
|
|
1224
1436
|
* // Execute one agent interaction at a time
|
|
1225
|
-
*
|
|
1226
|
-
* if (
|
|
1227
|
-
* console.log('
|
|
1228
|
-
* } else {
|
|
1229
|
-
* console.log('Scenario finished:', messages.success);
|
|
1437
|
+
* await execution.step();
|
|
1438
|
+
* if (execution.result) {
|
|
1439
|
+
* console.log('Scenario finished:', execution.result.success);
|
|
1230
1440
|
* }
|
|
1231
1441
|
* ```
|
|
1232
1442
|
*/
|
|
1233
|
-
step(): Promise<
|
|
1443
|
+
step(): Promise<void>;
|
|
1234
1444
|
private _step;
|
|
1235
1445
|
/**
|
|
1236
1446
|
* Calls a specific agent to generate a response or make a decision.
|
|
@@ -1249,15 +1459,12 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1249
1459
|
* After the agent responds:
|
|
1250
1460
|
* - Performance timing is recorded
|
|
1251
1461
|
* - Pending messages for this agent are cleared (they've been processed)
|
|
1252
|
-
* - If the agent returns a ScenarioResult, it's
|
|
1462
|
+
* - If the agent returns a ScenarioResult, it's set on this.result
|
|
1253
1463
|
* - Otherwise, the agent's messages are added to the conversation and broadcast
|
|
1254
1464
|
*
|
|
1255
1465
|
* @param idx - The index of the agent in the agents array
|
|
1256
1466
|
* @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
|
|
1257
1467
|
* @param judgmentRequest - Whether this is a judgment request (for judge agents)
|
|
1258
|
-
* @returns A promise that resolves with either:
|
|
1259
|
-
* - Array of messages if the agent generated a response, or
|
|
1260
|
-
* - ScenarioResult if the agent made a final decision
|
|
1261
1468
|
* @throws Error if the agent call fails
|
|
1262
1469
|
*/
|
|
1263
1470
|
private callAgent;
|
|
@@ -1270,7 +1477,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1270
1477
|
* - "assistant" messages are routed to AGENT role agents
|
|
1271
1478
|
* - Other message types are added directly to the conversation
|
|
1272
1479
|
*
|
|
1273
|
-
* @param message - The
|
|
1480
|
+
* @param message - The ModelMessage to add to the conversation
|
|
1274
1481
|
*
|
|
1275
1482
|
* @example
|
|
1276
1483
|
* ```typescript
|
|
@@ -1280,7 +1487,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1280
1487
|
* });
|
|
1281
1488
|
* ```
|
|
1282
1489
|
*/
|
|
1283
|
-
message(message:
|
|
1490
|
+
message(message: ModelMessage): Promise<void>;
|
|
1284
1491
|
/**
|
|
1285
1492
|
* Executes a user turn in the conversation.
|
|
1286
1493
|
*
|
|
@@ -1290,7 +1497,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1290
1497
|
*
|
|
1291
1498
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1292
1499
|
*
|
|
1293
|
-
* @param content - Optional content for the user's message. Can be a string or
|
|
1500
|
+
* @param content - Optional content for the user's message. Can be a string or ModelMessage.
|
|
1294
1501
|
* If not provided, the user simulator agent will generate the content.
|
|
1295
1502
|
*
|
|
1296
1503
|
* @example
|
|
@@ -1301,14 +1508,14 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1301
1508
|
* // Let user simulator generate content
|
|
1302
1509
|
* await execution.user();
|
|
1303
1510
|
*
|
|
1304
|
-
* // Use a
|
|
1511
|
+
* // Use a ModelMessage object
|
|
1305
1512
|
* await execution.user({
|
|
1306
1513
|
* role: "user",
|
|
1307
1514
|
* content: "Tell me a joke"
|
|
1308
1515
|
* });
|
|
1309
1516
|
* ```
|
|
1310
1517
|
*/
|
|
1311
|
-
user(content?: string |
|
|
1518
|
+
user(content?: string | ModelMessage): Promise<void>;
|
|
1312
1519
|
/**
|
|
1313
1520
|
* Executes an agent turn in the conversation.
|
|
1314
1521
|
*
|
|
@@ -1318,7 +1525,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1318
1525
|
*
|
|
1319
1526
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1320
1527
|
*
|
|
1321
|
-
* @param content - Optional content for the agent's response. Can be a string or
|
|
1528
|
+
* @param content - Optional content for the agent's response. Can be a string or ModelMessage.
|
|
1322
1529
|
* If not provided, the agent under test will generate the response.
|
|
1323
1530
|
*
|
|
1324
1531
|
* @example
|
|
@@ -1329,14 +1536,14 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1329
1536
|
* // Use provided content
|
|
1330
1537
|
* await execution.agent("The weather is sunny today!");
|
|
1331
1538
|
*
|
|
1332
|
-
* // Use a
|
|
1539
|
+
* // Use a ModelMessage object
|
|
1333
1540
|
* await execution.agent({
|
|
1334
1541
|
* role: "assistant",
|
|
1335
1542
|
* content: "I'm here to help you with weather information."
|
|
1336
1543
|
* });
|
|
1337
1544
|
* ```
|
|
1338
1545
|
*/
|
|
1339
|
-
agent(content?: string |
|
|
1546
|
+
agent(content?: string | ModelMessage): Promise<void>;
|
|
1340
1547
|
/**
|
|
1341
1548
|
* Invokes the judge agent to evaluate the current state of the conversation.
|
|
1342
1549
|
*
|
|
@@ -1364,7 +1571,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1364
1571
|
* const result = await execution.judge("Please consider the user's satisfaction level");
|
|
1365
1572
|
* ```
|
|
1366
1573
|
*/
|
|
1367
|
-
judge(content?: string |
|
|
1574
|
+
judge(content?: string | ModelMessage): Promise<ScenarioResult | null>;
|
|
1368
1575
|
/**
|
|
1369
1576
|
* Lets the scenario proceed automatically for a specified number of turns.
|
|
1370
1577
|
*
|
|
@@ -1472,49 +1679,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1472
1679
|
* ```
|
|
1473
1680
|
*/
|
|
1474
1681
|
addAgentTime(agentIdx: number, time: number): void;
|
|
1475
|
-
/**
|
|
1476
|
-
* Checks if a partial result has been set for the scenario.
|
|
1477
|
-
*
|
|
1478
|
-
* This method is used internally to determine if a scenario has already reached
|
|
1479
|
-
* a conclusion (success or failure) but hasn't been finalized yet. Partial results
|
|
1480
|
-
* are typically set by agents that make final decisions (like judge agents) and
|
|
1481
|
-
* are later finalized with the complete message history.
|
|
1482
|
-
*
|
|
1483
|
-
* @returns True if a partial result exists, false otherwise
|
|
1484
|
-
*
|
|
1485
|
-
* @example
|
|
1486
|
-
* ```typescript
|
|
1487
|
-
* // This is typically used internally by the execution engine
|
|
1488
|
-
* if (execution.hasResult()) {
|
|
1489
|
-
* console.log('Scenario has reached a conclusion');
|
|
1490
|
-
* }
|
|
1491
|
-
* ```
|
|
1492
|
-
*/
|
|
1493
|
-
hasResult(): boolean;
|
|
1494
|
-
/**
|
|
1495
|
-
* Sets a partial result for the scenario.
|
|
1496
|
-
*
|
|
1497
|
-
* This method is used internally to store intermediate results that may be
|
|
1498
|
-
* finalized later with the complete message history. Partial results are typically
|
|
1499
|
-
* created by agents that make final decisions (like judge agents) and contain
|
|
1500
|
-
* the success/failure status, reasoning, and criteria evaluation, but not the
|
|
1501
|
-
* complete message history.
|
|
1502
|
-
*
|
|
1503
|
-
* @param result - The partial result without the messages field. Should include
|
|
1504
|
-
* success status, reasoning, and criteria evaluation.
|
|
1505
|
-
*
|
|
1506
|
-
* @example
|
|
1507
|
-
* ```typescript
|
|
1508
|
-
* // This is typically called internally by agents that make final decisions
|
|
1509
|
-
* execution.setResult({
|
|
1510
|
-
* success: true,
|
|
1511
|
-
* reasoning: "Agent provided accurate weather information",
|
|
1512
|
-
* metCriteria: ["Provides accurate weather data"],
|
|
1513
|
-
* unmetCriteria: []
|
|
1514
|
-
* });
|
|
1515
|
-
* ```
|
|
1516
|
-
*/
|
|
1517
|
-
setResult(result: Omit<ScenarioResult, "messages">): void;
|
|
1518
1682
|
/**
|
|
1519
1683
|
* Internal method to handle script step calls to agents.
|
|
1520
1684
|
*
|
|
@@ -1527,7 +1691,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1527
1691
|
* - Progress to a new turn if no agent is available
|
|
1528
1692
|
* - Execute the agent with the provided content or let it generate content
|
|
1529
1693
|
* - Handle judgment requests for judge agents
|
|
1530
|
-
* -
|
|
1694
|
+
* - Set the result if the agent makes a decision
|
|
1531
1695
|
*
|
|
1532
1696
|
* @param role - The role of the agent to call (USER, AGENT, or JUDGE)
|
|
1533
1697
|
* @param content - Optional content to use instead of letting the agent generate it
|
|
@@ -1551,6 +1715,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1551
1715
|
* - Starts the first turn
|
|
1552
1716
|
* - Records the start time for performance tracking
|
|
1553
1717
|
* - Clears any pending messages
|
|
1718
|
+
* - Clears the result from any previous execution
|
|
1554
1719
|
*/
|
|
1555
1720
|
private reset;
|
|
1556
1721
|
private nextAgentForRole;
|
|
@@ -1575,7 +1740,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1575
1740
|
*
|
|
1576
1741
|
* This method is called when the scenario execution reaches the maximum number
|
|
1577
1742
|
* of turns without reaching a conclusion. It creates a failure result with
|
|
1578
|
-
* appropriate reasoning and includes performance metrics.
|
|
1743
|
+
* appropriate reasoning and includes performance metrics, then sets it on this.result.
|
|
1579
1744
|
*
|
|
1580
1745
|
* The result includes:
|
|
1581
1746
|
* - All messages from the conversation
|
|
@@ -1585,7 +1750,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1585
1750
|
* - Total execution time and agent response times
|
|
1586
1751
|
*
|
|
1587
1752
|
* @param errorMessage - Optional custom error message to use instead of the default
|
|
1588
|
-
* @returns A ScenarioResult indicating failure due to reaching max turns
|
|
1589
1753
|
*/
|
|
1590
1754
|
private reachedMaxTurns;
|
|
1591
1755
|
private getJudgeAgent;
|
|
@@ -1692,12 +1856,25 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1692
1856
|
* Adds a message to the conversation history.
|
|
1693
1857
|
*
|
|
1694
1858
|
* @param message - The message to add.
|
|
1859
|
+
* @param traceId - Optional trace ID to associate with the message.
|
|
1695
1860
|
*/
|
|
1696
|
-
addMessage(message: CoreMessage
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1861
|
+
addMessage(message: CoreMessage & {
|
|
1862
|
+
traceId?: string;
|
|
1863
|
+
}): void;
|
|
1864
|
+
lastMessage(): ai.ModelMessage & {
|
|
1865
|
+
id: string;
|
|
1866
|
+
traceId?: string;
|
|
1867
|
+
};
|
|
1868
|
+
lastUserMessage(): ai.UserModelMessage & {
|
|
1869
|
+
id: string;
|
|
1870
|
+
traceId?: string;
|
|
1871
|
+
};
|
|
1872
|
+
lastAgentMessage(): CoreAssistantMessage & {
|
|
1873
|
+
traceId?: string;
|
|
1874
|
+
};
|
|
1875
|
+
lastToolCall(toolName: string): CoreToolMessage & {
|
|
1876
|
+
traceId?: string;
|
|
1877
|
+
};
|
|
1701
1878
|
hasToolCall(toolName: string): boolean;
|
|
1702
1879
|
}
|
|
1703
1880
|
|
|
@@ -1871,4 +2048,4 @@ declare namespace script {
|
|
|
1871
2048
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
1872
2049
|
declare const scenario: ScenarioApi;
|
|
1873
2050
|
|
|
1874
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole,
|
|
2051
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|