@langwatch/scenario 0.2.9 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -15
- package/dist/{chunk-7H6OGEQ5.mjs → chunk-7HLDX5EL.mjs} +9 -14
- package/dist/{chunk-YPJZSK4J.mjs → chunk-OL4RFXV4.mjs} +23 -11
- package/dist/index.d.mts +559 -72
- package/dist/index.d.ts +559 -72
- package/dist/index.js +746 -212
- package/dist/index.mjs +711 -187
- package/dist/integrations/vitest/config.d.mts +37 -0
- package/dist/integrations/vitest/config.d.ts +37 -0
- package/dist/integrations/vitest/config.js +3 -276
- package/dist/integrations/vitest/config.mjs +3 -10
- package/dist/integrations/vitest/reporter.js +69 -17
- package/dist/integrations/vitest/reporter.mjs +182 -4
- package/dist/integrations/vitest/setup.js +24 -12
- package/dist/integrations/vitest/setup.mjs +2 -2
- package/package.json +21 -22
- package/dist/chunk-K7KLHTDI.mjs +0 -146
package/dist/index.d.mts
CHANGED
|
@@ -72,7 +72,6 @@ type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
|
|
|
72
72
|
*/
|
|
73
73
|
declare abstract class AgentAdapter {
|
|
74
74
|
role: AgentRole;
|
|
75
|
-
constructor(input: AgentInput);
|
|
76
75
|
/**
|
|
77
76
|
* Process the input and generate a response.
|
|
78
77
|
*
|
|
@@ -91,7 +90,6 @@ declare abstract class AgentAdapter {
|
|
|
91
90
|
*/
|
|
92
91
|
declare abstract class UserSimulatorAgentAdapter implements AgentAdapter {
|
|
93
92
|
role: AgentRole;
|
|
94
|
-
constructor(input: AgentInput);
|
|
95
93
|
/**
|
|
96
94
|
* Process the input and generate a user message.
|
|
97
95
|
*
|
|
@@ -110,7 +108,6 @@ declare abstract class JudgeAgentAdapter implements AgentAdapter {
|
|
|
110
108
|
* The criteria the judge will use to evaluate the conversation.
|
|
111
109
|
*/
|
|
112
110
|
abstract criteria: string[];
|
|
113
|
-
constructor(input: AgentInput);
|
|
114
111
|
/**
|
|
115
112
|
* Process the input and evaluate the conversation.
|
|
116
113
|
*
|
|
@@ -480,6 +477,32 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
480
477
|
/**
|
|
481
478
|
* Agent that evaluates conversations against success criteria.
|
|
482
479
|
*
|
|
480
|
+
* This is the default judge agent that is used if no judge agent is provided.
|
|
481
|
+
* It is a simple agent that uses function calling to make structured decisions
|
|
482
|
+
* and provides detailed reasoning for its verdicts.
|
|
483
|
+
*
|
|
484
|
+
* @param cfg {JudgeAgentConfig} Configuration for the judge agent.
|
|
485
|
+
*/
|
|
486
|
+
declare class JudgeAgent extends JudgeAgentAdapter {
|
|
487
|
+
private readonly cfg;
|
|
488
|
+
private logger;
|
|
489
|
+
role: AgentRole;
|
|
490
|
+
criteria: string[];
|
|
491
|
+
constructor(cfg: JudgeAgentConfig);
|
|
492
|
+
call(input: AgentInput): Promise<never[] | {
|
|
493
|
+
success: boolean;
|
|
494
|
+
messages: CoreMessage[];
|
|
495
|
+
reasoning: string;
|
|
496
|
+
metCriteria: string[];
|
|
497
|
+
unmetCriteria: string[];
|
|
498
|
+
}>;
|
|
499
|
+
private generateText;
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Factory function for creating JudgeAgent instances.
|
|
503
|
+
*
|
|
504
|
+
* JudgeAgent evaluates conversations against success criteria.
|
|
505
|
+
*
|
|
483
506
|
* The JudgeAgent watches conversations in real-time and makes decisions about
|
|
484
507
|
* whether the agent under test is meeting the specified criteria. It can either
|
|
485
508
|
* allow the conversation to continue or end it with a success/failure verdict.
|
|
@@ -525,18 +548,18 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
525
548
|
* main();
|
|
526
549
|
* ```
|
|
527
550
|
*/
|
|
528
|
-
declare const judgeAgent: (cfg: JudgeAgentConfig) =>
|
|
529
|
-
role: AgentRole.JUDGE;
|
|
530
|
-
criteria: string[];
|
|
531
|
-
call: (input: AgentInput) => Promise<never[] | {
|
|
532
|
-
success: boolean;
|
|
533
|
-
messages: CoreMessage[];
|
|
534
|
-
reasoning: string;
|
|
535
|
-
metCriteria: string[];
|
|
536
|
-
unmetCriteria: string[];
|
|
537
|
-
}>;
|
|
538
|
-
};
|
|
551
|
+
declare const judgeAgent: (cfg: JudgeAgentConfig) => JudgeAgent;
|
|
539
552
|
|
|
553
|
+
declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
554
|
+
private readonly cfg?;
|
|
555
|
+
private logger;
|
|
556
|
+
constructor(cfg?: TestingAgentConfig | undefined);
|
|
557
|
+
call: (input: AgentInput) => Promise<{
|
|
558
|
+
role: "user";
|
|
559
|
+
content: string;
|
|
560
|
+
}>;
|
|
561
|
+
private generateText;
|
|
562
|
+
}
|
|
540
563
|
/**
|
|
541
564
|
* Agent that simulates realistic user behavior in scenario conversations.
|
|
542
565
|
*
|
|
@@ -623,16 +646,10 @@ declare const judgeAgent: (cfg: JudgeAgentConfig) => {
|
|
|
623
646
|
* main();
|
|
624
647
|
* ```
|
|
625
648
|
*
|
|
626
|
-
*
|
|
649
|
+
* **Implementation Notes:**
|
|
627
650
|
* - Uses role reversal internally to work around LLM biases toward assistant roles
|
|
628
651
|
*/
|
|
629
|
-
declare const userSimulatorAgent: (config?: TestingAgentConfig) =>
|
|
630
|
-
role: AgentRole.USER;
|
|
631
|
-
call: (input: AgentInput) => Promise<{
|
|
632
|
-
role: "user";
|
|
633
|
-
content: string;
|
|
634
|
-
}>;
|
|
635
|
-
};
|
|
652
|
+
declare const userSimulatorAgent: (config?: TestingAgentConfig) => UserSimulatorAgent;
|
|
636
653
|
|
|
637
654
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
638
655
|
type agents_JudgeAgentConfig = JudgeAgentConfig;
|
|
@@ -1003,11 +1020,60 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
|
|
|
1003
1020
|
type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
|
|
1004
1021
|
|
|
1005
1022
|
/**
|
|
1006
|
-
* Manages the execution of a single scenario.
|
|
1023
|
+
* Manages the execution of a single scenario test.
|
|
1024
|
+
*
|
|
1025
|
+
* This class orchestrates the interaction between agents (user simulator, agent under test,
|
|
1026
|
+
* and judge), executes the test script step-by-step, and manages the scenario's state
|
|
1027
|
+
* throughout execution. It also emits events that can be subscribed to for real-time
|
|
1028
|
+
* monitoring of the scenario's progress.
|
|
1029
|
+
*
|
|
1030
|
+
* ## Execution Flow Overview
|
|
1031
|
+
*
|
|
1032
|
+
* The execution follows a turn-based system where agents take turns responding. The key
|
|
1033
|
+
* concepts are:
|
|
1034
|
+
* - **Script Steps**: Functions in the scenario script like `user()`, `agent()`, `proceed()`, etc.
|
|
1035
|
+
* - **Agent Interactions**: Individual agent responses that occur when an agent takes their turn
|
|
1036
|
+
* - **Turns**: Groups of agent interactions that happen in sequence
|
|
1037
|
+
*
|
|
1038
|
+
* ## Message Broadcasting System
|
|
1039
|
+
*
|
|
1040
|
+
* The class implements a sophisticated message broadcasting system that ensures all agents
|
|
1041
|
+
* can "hear" each other's messages:
|
|
1007
1042
|
*
|
|
1008
|
-
*
|
|
1009
|
-
*
|
|
1010
|
-
*
|
|
1043
|
+
* 1. **Message Creation**: When an agent sends a message, it's added to the conversation history
|
|
1044
|
+
* 2. **Broadcasting**: The message is immediately broadcast to all other agents via `broadcastMessage()`
|
|
1045
|
+
* 3. **Queue Management**: Each agent has a pending message queue (`pendingMessages`) that stores
|
|
1046
|
+
* messages from other agents
|
|
1047
|
+
* 4. **Agent Input**: When an agent is called, it receives both the full conversation history
|
|
1048
|
+
* and any new pending messages that have been broadcast to it
|
|
1049
|
+
* 5. **Queue Clearing**: After an agent processes its pending messages, its queue is cleared
|
|
1050
|
+
*
|
|
1051
|
+
* This creates a realistic conversation environment where agents can respond contextually
|
|
1052
|
+
* to the full conversation history and any new messages from other agents.
|
|
1053
|
+
*
|
|
1054
|
+
* ## Example Message Flow
|
|
1055
|
+
*
|
|
1056
|
+
* ```
|
|
1057
|
+
* Turn 1:
|
|
1058
|
+
* 1. User Agent sends: "Hello"
|
|
1059
|
+
* - Added to conversation history
|
|
1060
|
+
* - Broadcast to Agent and Judge (pendingMessages[1] = ["Hello"], pendingMessages[2] = ["Hello"])
|
|
1061
|
+
*
|
|
1062
|
+
* 2. Agent is called:
|
|
1063
|
+
* - Receives: full conversation + pendingMessages[1] = ["Hello"]
|
|
1064
|
+
* - Sends: "Hi there! How can I help you?"
|
|
1065
|
+
* - Added to conversation history
|
|
1066
|
+
* - Broadcast to User and Judge (pendingMessages[0] = ["Hi there!..."], pendingMessages[2] = ["Hello", "Hi there!..."])
|
|
1067
|
+
* - pendingMessages[1] is cleared
|
|
1068
|
+
*
|
|
1069
|
+
* 3. Judge is called:
|
|
1070
|
+
* - Receives: full conversation + pendingMessages[2] = ["Hello", "Hi there!..."]
|
|
1071
|
+
* - Evaluates and decides to continue
|
|
1072
|
+
* - pendingMessages[2] is cleared
|
|
1073
|
+
* ```
|
|
1074
|
+
*
|
|
1075
|
+
* Each script step can trigger one or more agent interactions depending on the step type.
|
|
1076
|
+
* For example, a `proceed(5)` step might trigger 10 agent interactions across 5 turns.
|
|
1011
1077
|
*
|
|
1012
1078
|
* Note: This is an internal class. Most users will interact with the higher-level
|
|
1013
1079
|
* `scenario.run()` function instead of instantiating this class directly.
|
|
@@ -1027,9 +1093,10 @@ type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
|
|
|
1027
1093
|
* }),
|
|
1028
1094
|
* ],
|
|
1029
1095
|
* script: [
|
|
1030
|
-
* scenario.user("Hello"),
|
|
1031
|
-
* scenario.agent(),
|
|
1032
|
-
* scenario.
|
|
1096
|
+
* scenario.user("Hello"), // Script step 1: triggers 1 agent interaction
|
|
1097
|
+
* scenario.agent(), // Script step 2: triggers 1 agent interaction
|
|
1098
|
+
* scenario.proceed(3), // Script step 3: triggers multiple agent interactions
|
|
1099
|
+
* scenario.judge(), // Script step 4: triggers 1 agent interaction
|
|
1033
1100
|
* ]
|
|
1034
1101
|
* });
|
|
1035
1102
|
*
|
|
@@ -1037,34 +1104,62 @@ type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
|
|
|
1037
1104
|
* ```
|
|
1038
1105
|
*/
|
|
1039
1106
|
declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
1107
|
+
/** The current state of the scenario execution */
|
|
1040
1108
|
private state;
|
|
1041
|
-
|
|
1109
|
+
/** Logger for debugging and monitoring */
|
|
1042
1110
|
private logger;
|
|
1111
|
+
/** Finalized configuration with all defaults applied */
|
|
1043
1112
|
private config;
|
|
1113
|
+
/** Array of all agents participating in the scenario */
|
|
1044
1114
|
private agents;
|
|
1115
|
+
/** Roles that still need to act in the current turn (USER, AGENT, JUDGE) */
|
|
1045
1116
|
private pendingRolesOnTurn;
|
|
1117
|
+
/** Agents that still need to act in the current turn */
|
|
1046
1118
|
private pendingAgentsOnTurn;
|
|
1119
|
+
/**
|
|
1120
|
+
* Message queues for each agent. When an agent sends a message, it gets
|
|
1121
|
+
* broadcast to all other agents' pending message queues. When an agent
|
|
1122
|
+
* is called, it receives these pending messages as part of its input.
|
|
1123
|
+
*
|
|
1124
|
+
* Key: agent index, Value: array of pending messages for that agent
|
|
1125
|
+
*/
|
|
1047
1126
|
private pendingMessages;
|
|
1127
|
+
/** Intermediate result set by agents that make final decisions */
|
|
1048
1128
|
private partialResult;
|
|
1129
|
+
/** Accumulated execution time for each agent (for performance tracking) */
|
|
1049
1130
|
private agentTimes;
|
|
1131
|
+
/** Timestamp when execution started (for total time calculation) */
|
|
1050
1132
|
private totalStartTime;
|
|
1133
|
+
/** Event stream for monitoring scenario progress */
|
|
1134
|
+
private eventSubject;
|
|
1051
1135
|
/**
|
|
1052
1136
|
* An observable stream of events that occur during the scenario execution.
|
|
1053
1137
|
* Subscribe to this to monitor the progress of the scenario in real-time.
|
|
1138
|
+
*
|
|
1139
|
+
* Events include:
|
|
1140
|
+
* - RUN_STARTED: When scenario execution begins
|
|
1141
|
+
* - MESSAGE_SNAPSHOT: After each message is added to the conversation
|
|
1142
|
+
* - RUN_FINISHED: When scenario execution completes (success/failure/error)
|
|
1054
1143
|
*/
|
|
1055
1144
|
readonly events$: Observable<ScenarioEvent>;
|
|
1056
1145
|
/**
|
|
1057
1146
|
* Creates a new ScenarioExecution instance.
|
|
1058
|
-
*
|
|
1059
|
-
* @param
|
|
1147
|
+
*
|
|
1148
|
+
* @param config - The scenario configuration containing agents, settings, and metadata
|
|
1149
|
+
* @param script - The ordered sequence of script steps that define the test flow
|
|
1060
1150
|
*/
|
|
1061
1151
|
constructor(config: ScenarioConfig, script: ScriptStep[]);
|
|
1062
1152
|
/**
|
|
1063
|
-
*
|
|
1153
|
+
* Gets the complete conversation history as an array of messages.
|
|
1154
|
+
*
|
|
1155
|
+
* @returns Array of CoreMessage objects representing the full conversation
|
|
1064
1156
|
*/
|
|
1065
1157
|
get messages(): CoreMessage[];
|
|
1066
1158
|
/**
|
|
1067
|
-
*
|
|
1159
|
+
* Gets the unique identifier for the conversation thread.
|
|
1160
|
+
* This ID is used to maintain conversation context across multiple runs.
|
|
1161
|
+
*
|
|
1162
|
+
* @returns The thread identifier string
|
|
1068
1163
|
*/
|
|
1069
1164
|
get threadId(): string;
|
|
1070
1165
|
/**
|
|
@@ -1073,85 +1168,422 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1073
1168
|
private get totalTime();
|
|
1074
1169
|
/**
|
|
1075
1170
|
* Executes the entire scenario from start to finish.
|
|
1076
|
-
*
|
|
1077
|
-
*
|
|
1078
|
-
*
|
|
1171
|
+
*
|
|
1172
|
+
* This method runs through all script steps sequentially until a final result
|
|
1173
|
+
* (success, failure, or error) is determined. Each script step can trigger one or
|
|
1174
|
+
* more agent interactions depending on the step type:
|
|
1175
|
+
* - `user()` and `agent()` steps typically trigger one agent interaction each
|
|
1176
|
+
* - `proceed()` steps can trigger multiple agent interactions across multiple turns
|
|
1177
|
+
* - `judge()` steps trigger the judge agent to evaluate the conversation
|
|
1178
|
+
* - `succeed()` and `fail()` steps immediately end the scenario
|
|
1179
|
+
*
|
|
1180
|
+
* The execution will stop early if:
|
|
1181
|
+
* - A script step returns a ScenarioResult
|
|
1182
|
+
* - The maximum number of turns is reached
|
|
1183
|
+
* - An error occurs during execution
|
|
1184
|
+
*
|
|
1185
|
+
* @returns A promise that resolves with the final result of the scenario
|
|
1186
|
+
* @throws Error if an unhandled exception occurs during execution
|
|
1187
|
+
*
|
|
1188
|
+
* @example
|
|
1189
|
+
* ```typescript
|
|
1190
|
+
* const execution = new ScenarioExecution(config, script);
|
|
1191
|
+
* const result = await execution.execute();
|
|
1192
|
+
* console.log(`Scenario ${result.success ? 'passed' : 'failed'}`);
|
|
1193
|
+
* ```
|
|
1079
1194
|
*/
|
|
1080
1195
|
execute(): Promise<ScenarioResult>;
|
|
1081
1196
|
/**
|
|
1082
|
-
* Executes a single
|
|
1083
|
-
*
|
|
1084
|
-
* for
|
|
1085
|
-
*
|
|
1197
|
+
* Executes a single agent interaction in the scenario.
|
|
1198
|
+
*
|
|
1199
|
+
* This method is for manual step-by-step execution of the scenario, where each call
|
|
1200
|
+
* represents one agent taking their turn. This is different from script steps (like
|
|
1201
|
+
* `user()`, `agent()`, `proceed()`, etc.) which are functions in the scenario script.
|
|
1202
|
+
*
|
|
1203
|
+
* Each call to this method will:
|
|
1204
|
+
* - Progress to the next turn if needed
|
|
1205
|
+
* - Find the next agent that should act
|
|
1206
|
+
* - Execute that agent's response
|
|
1207
|
+
* - Return either new messages or a final scenario result
|
|
1208
|
+
*
|
|
1209
|
+
* Note: This method is primarily for debugging or custom execution flows. Most users
|
|
1210
|
+
* will use `execute()` to run the entire scenario automatically.
|
|
1211
|
+
*
|
|
1212
|
+
* @returns A promise that resolves with either:
|
|
1213
|
+
* - Array of new messages added during the agent interaction, or
|
|
1214
|
+
* - A final ScenarioResult if the interaction concludes the scenario
|
|
1215
|
+
* @throws Error if no result is returned from the step
|
|
1216
|
+
*
|
|
1217
|
+
* @example
|
|
1218
|
+
* ```typescript
|
|
1219
|
+
* const execution = new ScenarioExecution(config, script);
|
|
1220
|
+
*
|
|
1221
|
+
* // Execute one agent interaction at a time
|
|
1222
|
+
* const messages = await execution.step();
|
|
1223
|
+
* if (Array.isArray(messages)) {
|
|
1224
|
+
* console.log('New messages:', messages);
|
|
1225
|
+
* } else {
|
|
1226
|
+
* console.log('Scenario finished:', messages.success);
|
|
1227
|
+
* }
|
|
1228
|
+
* ```
|
|
1086
1229
|
*/
|
|
1087
1230
|
step(): Promise<CoreMessage[] | ScenarioResult>;
|
|
1088
1231
|
private _step;
|
|
1232
|
+
/**
|
|
1233
|
+
* Calls a specific agent to generate a response or make a decision.
|
|
1234
|
+
*
|
|
1235
|
+
* This method is the core of agent interaction. It prepares the agent's input
|
|
1236
|
+
* by combining the conversation history with any pending messages that have been
|
|
1237
|
+
* broadcast to this agent, then calls the agent and processes its response.
|
|
1238
|
+
*
|
|
1239
|
+
* The agent input includes:
|
|
1240
|
+
* - Full conversation history (this.state.messages)
|
|
1241
|
+
* - New messages that have been broadcast to this agent (this.pendingMessages.get(idx))
|
|
1242
|
+
* - The role the agent is being asked to play
|
|
1243
|
+
* - Whether this is a judgment request (for judge agents)
|
|
1244
|
+
* - Current scenario state and configuration
|
|
1245
|
+
*
|
|
1246
|
+
* After the agent responds:
|
|
1247
|
+
* - Performance timing is recorded
|
|
1248
|
+
* - Pending messages for this agent are cleared (they've been processed)
|
|
1249
|
+
* - If the agent returns a ScenarioResult, it's returned immediately
|
|
1250
|
+
* - Otherwise, the agent's messages are added to the conversation and broadcast
|
|
1251
|
+
*
|
|
1252
|
+
* @param idx - The index of the agent in the agents array
|
|
1253
|
+
* @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
|
|
1254
|
+
* @param judgmentRequest - Whether this is a judgment request (for judge agents)
|
|
1255
|
+
* @returns A promise that resolves with either:
|
|
1256
|
+
* - Array of messages if the agent generated a response, or
|
|
1257
|
+
* - ScenarioResult if the agent made a final decision
|
|
1258
|
+
* @throws Error if the agent call fails
|
|
1259
|
+
*/
|
|
1089
1260
|
private callAgent;
|
|
1090
1261
|
/**
|
|
1091
1262
|
* Adds a message to the conversation history.
|
|
1092
|
-
*
|
|
1093
|
-
*
|
|
1263
|
+
*
|
|
1264
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1265
|
+
* It automatically routes the message to the appropriate agent based on the message role:
|
|
1266
|
+
* - "user" messages are routed to USER role agents
|
|
1267
|
+
* - "assistant" messages are routed to AGENT role agents
|
|
1268
|
+
* - Other message types are added directly to the conversation
|
|
1269
|
+
*
|
|
1270
|
+
* @param message - The CoreMessage to add to the conversation
|
|
1271
|
+
*
|
|
1272
|
+
* @example
|
|
1273
|
+
* ```typescript
|
|
1274
|
+
* await execution.message({
|
|
1275
|
+
* role: "user",
|
|
1276
|
+
* content: "Hello, how are you?"
|
|
1277
|
+
* });
|
|
1278
|
+
* ```
|
|
1094
1279
|
*/
|
|
1095
1280
|
message(message: CoreMessage): Promise<void>;
|
|
1096
1281
|
/**
|
|
1097
|
-
* Executes a user turn.
|
|
1098
|
-
*
|
|
1099
|
-
* If
|
|
1100
|
-
*
|
|
1101
|
-
*
|
|
1282
|
+
* Executes a user turn in the conversation.
|
|
1283
|
+
*
|
|
1284
|
+
* If content is provided, it's used directly as the user's message. If not provided,
|
|
1285
|
+
* the user simulator agent is called to generate an appropriate response based on
|
|
1286
|
+
* the current conversation context.
|
|
1287
|
+
*
|
|
1288
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1289
|
+
*
|
|
1290
|
+
* @param content - Optional content for the user's message. Can be a string or CoreMessage.
|
|
1291
|
+
* If not provided, the user simulator agent will generate the content.
|
|
1292
|
+
*
|
|
1293
|
+
* @example
|
|
1294
|
+
* ```typescript
|
|
1295
|
+
* // Use provided content
|
|
1296
|
+
* await execution.user("What's the weather like?");
|
|
1297
|
+
*
|
|
1298
|
+
* // Let user simulator generate content
|
|
1299
|
+
* await execution.user();
|
|
1300
|
+
*
|
|
1301
|
+
* // Use a CoreMessage object
|
|
1302
|
+
* await execution.user({
|
|
1303
|
+
* role: "user",
|
|
1304
|
+
* content: "Tell me a joke"
|
|
1305
|
+
* });
|
|
1306
|
+
* ```
|
|
1102
1307
|
*/
|
|
1103
1308
|
user(content?: string | CoreMessage): Promise<void>;
|
|
1104
1309
|
/**
|
|
1105
|
-
* Executes an agent turn.
|
|
1106
|
-
*
|
|
1107
|
-
* If
|
|
1108
|
-
*
|
|
1109
|
-
*
|
|
1310
|
+
* Executes an agent turn in the conversation.
|
|
1311
|
+
*
|
|
1312
|
+
* If content is provided, it's used directly as the agent's response. If not provided,
|
|
1313
|
+
* the agent under test is called to generate a response based on the current conversation
|
|
1314
|
+
* context and any pending messages.
|
|
1315
|
+
*
|
|
1316
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1317
|
+
*
|
|
1318
|
+
* @param content - Optional content for the agent's response. Can be a string or CoreMessage.
|
|
1319
|
+
* If not provided, the agent under test will generate the response.
|
|
1320
|
+
*
|
|
1321
|
+
* @example
|
|
1322
|
+
* ```typescript
|
|
1323
|
+
* // Let agent generate response
|
|
1324
|
+
* await execution.agent();
|
|
1325
|
+
*
|
|
1326
|
+
* // Use provided content
|
|
1327
|
+
* await execution.agent("The weather is sunny today!");
|
|
1328
|
+
*
|
|
1329
|
+
* // Use a CoreMessage object
|
|
1330
|
+
* await execution.agent({
|
|
1331
|
+
* role: "assistant",
|
|
1332
|
+
* content: "I'm here to help you with weather information."
|
|
1333
|
+
* });
|
|
1334
|
+
* ```
|
|
1110
1335
|
*/
|
|
1111
1336
|
agent(content?: string | CoreMessage): Promise<void>;
|
|
1112
1337
|
/**
|
|
1113
1338
|
* Invokes the judge agent to evaluate the current state of the conversation.
|
|
1114
|
-
*
|
|
1115
|
-
*
|
|
1116
|
-
*
|
|
1339
|
+
*
|
|
1340
|
+
* The judge agent analyzes the conversation history and determines whether the
|
|
1341
|
+
* scenario criteria have been met. This can result in either:
|
|
1342
|
+
* - A final scenario result (success/failure) if the judge makes a decision
|
|
1343
|
+
* - Null if the judge needs more information or conversation to continue
|
|
1344
|
+
*
|
|
1345
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1346
|
+
*
|
|
1347
|
+
* @param content - Optional message to pass to the judge agent for additional context
|
|
1348
|
+
* @returns A promise that resolves with:
|
|
1349
|
+
* - ScenarioResult if the judge makes a final decision, or
|
|
1350
|
+
* - Null if the conversation should continue
|
|
1351
|
+
*
|
|
1352
|
+
* @example
|
|
1353
|
+
* ```typescript
|
|
1354
|
+
* // Let judge evaluate current state
|
|
1355
|
+
* const result = await execution.judge();
|
|
1356
|
+
* if (result) {
|
|
1357
|
+
* console.log(`Judge decided: ${result.success ? 'pass' : 'fail'}`);
|
|
1358
|
+
* }
|
|
1359
|
+
*
|
|
1360
|
+
* // Provide additional context to judge
|
|
1361
|
+
* const result = await execution.judge("Please consider the user's satisfaction level");
|
|
1362
|
+
* ```
|
|
1117
1363
|
*/
|
|
1118
1364
|
judge(content?: string | CoreMessage): Promise<ScenarioResult | null>;
|
|
1119
1365
|
/**
|
|
1120
1366
|
* Lets the scenario proceed automatically for a specified number of turns.
|
|
1121
|
-
*
|
|
1122
|
-
* This is
|
|
1123
|
-
*
|
|
1124
|
-
*
|
|
1125
|
-
*
|
|
1126
|
-
*
|
|
1367
|
+
*
|
|
1368
|
+
* This method is a script step that simulates natural conversation flow by allowing
|
|
1369
|
+
* agents to interact automatically without explicit script steps. It can trigger
|
|
1370
|
+
* multiple agent interactions across multiple turns, making it useful for testing
|
|
1371
|
+
* scenarios where you want to see how agents behave in extended conversations.
|
|
1372
|
+
*
|
|
1373
|
+
* Unlike other script steps that typically trigger one agent interaction each,
|
|
1374
|
+
* this step can trigger many agent interactions depending on the number of turns
|
|
1375
|
+
* and the agents' behavior.
|
|
1376
|
+
*
|
|
1377
|
+
* The method will continue until:
|
|
1378
|
+
* - The specified number of turns is reached
|
|
1379
|
+
* - A final scenario result is determined
|
|
1380
|
+
* - The maximum turns limit is reached
|
|
1381
|
+
*
|
|
1382
|
+
* @param turns - The number of turns to proceed. If undefined, runs until a conclusion
|
|
1383
|
+
* or max turns is reached
|
|
1384
|
+
* @param onTurn - Optional callback executed at the end of each turn. Receives the
|
|
1385
|
+
* current execution state
|
|
1386
|
+
* @param onStep - Optional callback executed after each agent interaction. Receives
|
|
1387
|
+
* the current execution state
|
|
1388
|
+
* @returns A promise that resolves with:
|
|
1389
|
+
* - ScenarioResult if a conclusion is reached during the proceeding, or
|
|
1390
|
+
* - Null if the specified turns complete without conclusion
|
|
1391
|
+
*
|
|
1392
|
+
* @example
|
|
1393
|
+
* ```typescript
|
|
1394
|
+
* // Proceed for 5 turns
|
|
1395
|
+
* const result = await execution.proceed(5);
|
|
1396
|
+
*
|
|
1397
|
+
* // Proceed until conclusion with callbacks
|
|
1398
|
+
* const result = await execution.proceed(
|
|
1399
|
+
* undefined,
|
|
1400
|
+
* (state) => console.log(`Turn ${state.currentTurn} completed`),
|
|
1401
|
+
* (state) => console.log(`Agent interaction completed, ${state.messages.length} messages`)
|
|
1402
|
+
* );
|
|
1403
|
+
* ```
|
|
1127
1404
|
*/
|
|
1128
1405
|
proceed(turns?: number, onTurn?: (state: ScenarioExecutionStateLike) => void | Promise<void>, onStep?: (state: ScenarioExecutionStateLike) => void | Promise<void>): Promise<ScenarioResult | null>;
|
|
1129
1406
|
/**
|
|
1130
1407
|
* Immediately ends the scenario with a success verdict.
|
|
1131
|
-
*
|
|
1132
|
-
*
|
|
1133
|
-
*
|
|
1408
|
+
*
|
|
1409
|
+
* This method forces the scenario to end successfully, regardless of the current
|
|
1410
|
+
* conversation state. It's useful for scenarios where you want to explicitly
|
|
1411
|
+
* mark success based on specific conditions or external factors.
|
|
1412
|
+
*
|
|
1413
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1414
|
+
*
|
|
1415
|
+
* @param reasoning - Optional explanation for why the scenario is being marked as successful
|
|
1416
|
+
* @returns A promise that resolves with the final successful scenario result
|
|
1417
|
+
*
|
|
1418
|
+
* @example
|
|
1419
|
+
* ```typescript
|
|
1420
|
+
* // Mark success with default reasoning
|
|
1421
|
+
* const result = await execution.succeed();
|
|
1422
|
+
*
|
|
1423
|
+
* // Mark success with custom reasoning
|
|
1424
|
+
* const result = await execution.succeed(
|
|
1425
|
+
* "User successfully completed the onboarding flow"
|
|
1426
|
+
* );
|
|
1427
|
+
* ```
|
|
1134
1428
|
*/
|
|
1135
1429
|
succeed(reasoning?: string): Promise<ScenarioResult>;
|
|
1136
1430
|
/**
|
|
1137
1431
|
* Immediately ends the scenario with a failure verdict.
|
|
1138
|
-
*
|
|
1139
|
-
*
|
|
1140
|
-
*
|
|
1432
|
+
*
|
|
1433
|
+
* This method forces the scenario to end with failure, regardless of the current
|
|
1434
|
+
* conversation state. It's useful for scenarios where you want to explicitly
|
|
1435
|
+
* mark failure based on specific conditions or external factors.
|
|
1436
|
+
*
|
|
1437
|
+
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1438
|
+
*
|
|
1439
|
+
* @param reasoning - Optional explanation for why the scenario is being marked as failed
|
|
1440
|
+
* @returns A promise that resolves with the final failed scenario result
|
|
1441
|
+
*
|
|
1442
|
+
* @example
|
|
1443
|
+
* ```typescript
|
|
1444
|
+
* // Mark failure with default reasoning
|
|
1445
|
+
* const result = await execution.fail();
|
|
1446
|
+
*
|
|
1447
|
+
* // Mark failure with custom reasoning
|
|
1448
|
+
* const result = await execution.fail(
|
|
1449
|
+
* "Agent failed to provide accurate weather information"
|
|
1450
|
+
* );
|
|
1451
|
+
* ```
|
|
1141
1452
|
*/
|
|
1142
1453
|
fail(reasoning?: string): Promise<ScenarioResult>;
|
|
1454
|
+
/**
|
|
1455
|
+
* Adds execution time for a specific agent to the performance tracking.
|
|
1456
|
+
*
|
|
1457
|
+
* This method is used internally to track how long each agent takes to respond,
|
|
1458
|
+
* which is included in the final scenario result for performance analysis.
|
|
1459
|
+
* The accumulated time for each agent is used to calculate total agent response
|
|
1460
|
+
* times in the scenario result.
|
|
1461
|
+
*
|
|
1462
|
+
* @param agentIdx - The index of the agent in the agents array
|
|
1463
|
+
* @param time - The execution time in milliseconds to add to the agent's total
|
|
1464
|
+
*
|
|
1465
|
+
* @example
|
|
1466
|
+
* ```typescript
|
|
1467
|
+
* // This is typically called internally by the execution engine
|
|
1468
|
+
* execution.addAgentTime(0, 1500); // Agent at index 0 took 1.5 seconds
|
|
1469
|
+
* ```
|
|
1470
|
+
*/
|
|
1143
1471
|
addAgentTime(agentIdx: number, time: number): void;
|
|
1472
|
+
/**
|
|
1473
|
+
* Checks if a partial result has been set for the scenario.
|
|
1474
|
+
*
|
|
1475
|
+
* This method is used internally to determine if a scenario has already reached
|
|
1476
|
+
* a conclusion (success or failure) but hasn't been finalized yet. Partial results
|
|
1477
|
+
* are typically set by agents that make final decisions (like judge agents) and
|
|
1478
|
+
* are later finalized with the complete message history.
|
|
1479
|
+
*
|
|
1480
|
+
* @returns True if a partial result exists, false otherwise
|
|
1481
|
+
*
|
|
1482
|
+
* @example
|
|
1483
|
+
* ```typescript
|
|
1484
|
+
* // This is typically used internally by the execution engine
|
|
1485
|
+
* if (execution.hasResult()) {
|
|
1486
|
+
* console.log('Scenario has reached a conclusion');
|
|
1487
|
+
* }
|
|
1488
|
+
* ```
|
|
1489
|
+
*/
|
|
1144
1490
|
hasResult(): boolean;
|
|
1491
|
+
/**
|
|
1492
|
+
* Sets a partial result for the scenario.
|
|
1493
|
+
*
|
|
1494
|
+
* This method is used internally to store intermediate results that may be
|
|
1495
|
+
* finalized later with the complete message history. Partial results are typically
|
|
1496
|
+
* created by agents that make final decisions (like judge agents) and contain
|
|
1497
|
+
* the success/failure status, reasoning, and criteria evaluation, but not the
|
|
1498
|
+
* complete message history.
|
|
1499
|
+
*
|
|
1500
|
+
* @param result - The partial result without the messages field. Should include
|
|
1501
|
+
* success status, reasoning, and criteria evaluation.
|
|
1502
|
+
*
|
|
1503
|
+
* @example
|
|
1504
|
+
* ```typescript
|
|
1505
|
+
* // This is typically called internally by agents that make final decisions
|
|
1506
|
+
* execution.setResult({
|
|
1507
|
+
* success: true,
|
|
1508
|
+
* reasoning: "Agent provided accurate weather information",
|
|
1509
|
+
* metCriteria: ["Provides accurate weather data"],
|
|
1510
|
+
* unmetCriteria: []
|
|
1511
|
+
* });
|
|
1512
|
+
* ```
|
|
1513
|
+
*/
|
|
1145
1514
|
setResult(result: Omit<ScenarioResult, "messages">): void;
|
|
1515
|
+
/**
|
|
1516
|
+
* Internal method to handle script step calls to agents.
|
|
1517
|
+
*
|
|
1518
|
+
* This method is the core logic for executing script steps that involve agent
|
|
1519
|
+
* interactions. It handles finding the appropriate agent for the given role,
|
|
1520
|
+
* managing turn progression, and executing the agent's response.
|
|
1521
|
+
*
|
|
1522
|
+
* The method will:
|
|
1523
|
+
* - Find the next available agent for the specified role
|
|
1524
|
+
* - Progress to a new turn if no agent is available
|
|
1525
|
+
* - Execute the agent with the provided content or let it generate content
|
|
1526
|
+
* - Handle judgment requests for judge agents
|
|
1527
|
+
* - Return a final result if the agent makes a decision
|
|
1528
|
+
*
|
|
1529
|
+
* @param role - The role of the agent to call (USER, AGENT, or JUDGE)
|
|
1530
|
+
* @param content - Optional content to use instead of letting the agent generate it
|
|
1531
|
+
* @param judgmentRequest - Whether this is a judgment request (for judge agents)
|
|
1532
|
+
* @returns A promise that resolves with a ScenarioResult if the agent makes a final
|
|
1533
|
+
* decision, or null if the conversation should continue
|
|
1534
|
+
* @throws Error if no agent is found for the specified role
|
|
1535
|
+
*/
|
|
1146
1536
|
private scriptCallAgent;
|
|
1537
|
+
/**
|
|
1538
|
+
* Resets the scenario execution to its initial state.
|
|
1539
|
+
*
|
|
1540
|
+
* This method is called at the beginning of each execution to ensure a clean
|
|
1541
|
+
* state. It creates a new execution state, initializes agents, sets up the
|
|
1542
|
+
* first turn, and clears any pending messages or partial results.
|
|
1543
|
+
*
|
|
1544
|
+
* The reset process:
|
|
1545
|
+
* - Creates a new ScenarioExecutionState with the current config
|
|
1546
|
+
* - Sets up the thread ID (generates new one if not provided)
|
|
1547
|
+
* - Initializes all agents
|
|
1548
|
+
* - Starts the first turn
|
|
1549
|
+
* - Records the start time for performance tracking
|
|
1550
|
+
* - Clears any pending messages
|
|
1551
|
+
*/
|
|
1147
1552
|
private reset;
|
|
1148
1553
|
private nextAgentForRole;
|
|
1554
|
+
/**
|
|
1555
|
+
* Starts a new turn in the scenario execution.
|
|
1556
|
+
*
|
|
1557
|
+
* This method is called when transitioning to a new turn. It resets the pending
|
|
1558
|
+
* agents and roles for the turn, allowing all agents to participate again in
|
|
1559
|
+
* the new turn. The turn counter is incremented to track the current turn number.
|
|
1560
|
+
*
|
|
1561
|
+
* A turn represents a cycle where agents can take actions. Each turn can involve
|
|
1562
|
+
* multiple agent interactions as agents respond to each other's messages.
|
|
1563
|
+
*/
|
|
1149
1564
|
private newTurn;
|
|
1150
1565
|
private removePendingRole;
|
|
1151
1566
|
private removePendingAgent;
|
|
1152
1567
|
private getNextAgentForRole;
|
|
1153
1568
|
private setAgents;
|
|
1154
1569
|
private consumeUntilRole;
|
|
1570
|
+
/**
|
|
1571
|
+
* Creates a failure result when the maximum number of turns is reached.
|
|
1572
|
+
*
|
|
1573
|
+
* This method is called when the scenario execution reaches the maximum number
|
|
1574
|
+
* of turns without reaching a conclusion. It creates a failure result with
|
|
1575
|
+
* appropriate reasoning and includes performance metrics.
|
|
1576
|
+
*
|
|
1577
|
+
* The result includes:
|
|
1578
|
+
* - All messages from the conversation
|
|
1579
|
+
* - Failure reasoning explaining the turn limit was reached
|
|
1580
|
+
* - Empty met criteria (since no conclusion was reached)
|
|
1581
|
+
* - All judge criteria as unmet (since no evaluation was completed)
|
|
1582
|
+
* - Total execution time and agent response times
|
|
1583
|
+
*
|
|
1584
|
+
* @param errorMessage - Optional custom error message to use instead of the default
|
|
1585
|
+
* @returns A ScenarioResult indicating failure due to reaching max turns
|
|
1586
|
+
*/
|
|
1155
1587
|
private reachedMaxTurns;
|
|
1156
1588
|
private getJudgeAgent;
|
|
1157
1589
|
/**
|
|
@@ -1177,12 +1609,61 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1177
1609
|
/**
|
|
1178
1610
|
* Distributes a message to all other agents in the scenario.
|
|
1179
1611
|
*
|
|
1180
|
-
*
|
|
1181
|
-
*
|
|
1612
|
+
* This method implements the message broadcasting system that allows agents to
|
|
1613
|
+
* "hear" messages from other agents. When an agent sends a message, it needs to
|
|
1614
|
+
* be distributed to all other agents so they can respond appropriately.
|
|
1615
|
+
*
|
|
1616
|
+
* The broadcasting process:
|
|
1617
|
+
* 1. Iterates through all agents in the scenario
|
|
1618
|
+
* 2. Skips the agent that sent the message (to avoid echo)
|
|
1619
|
+
* 3. Adds the message to each agent's pending message queue
|
|
1620
|
+
* 4. Agents will receive these messages when they're called next
|
|
1621
|
+
*
|
|
1622
|
+
* This creates a realistic conversation environment where agents can see
|
|
1623
|
+
* the full conversation history and respond contextually.
|
|
1624
|
+
*
|
|
1625
|
+
* @param message - The message to broadcast to all other agents
|
|
1626
|
+
* @param fromAgentIdx - The index of the agent that sent the message (to avoid echoing back to sender)
|
|
1627
|
+
*
|
|
1628
|
+
* @example
|
|
1629
|
+
* ```typescript
|
|
1630
|
+
* // When agent 0 sends a message, it gets broadcast to agents 1 and 2
|
|
1631
|
+
* execution.broadcastMessage(
|
|
1632
|
+
* { role: "user", content: "Hello" },
|
|
1633
|
+
* 0 // fromAgentIdx
|
|
1634
|
+
* );
|
|
1635
|
+
* // Now agents 1 and 2 have this message in their pendingMessages queue
|
|
1636
|
+
* ```
|
|
1182
1637
|
*/
|
|
1183
1638
|
private broadcastMessage;
|
|
1639
|
+
/**
|
|
1640
|
+
* Executes a single script step with proper error handling and logging.
|
|
1641
|
+
*
|
|
1642
|
+
* This method is responsible for executing each script step function with
|
|
1643
|
+
* comprehensive error handling and logging. It provides the execution context
|
|
1644
|
+
* to the script step and handles any errors that occur during execution.
|
|
1645
|
+
*
|
|
1646
|
+
* The method:
|
|
1647
|
+
* - Logs the start of script step execution
|
|
1648
|
+
* - Calls the script step function with the current state and execution context
|
|
1649
|
+
* - Logs the completion of the script step
|
|
1650
|
+
* - Handles and logs any errors that occur
|
|
1651
|
+
* - Re-throws errors to maintain the original error context
|
|
1652
|
+
*
|
|
1653
|
+
* @param scriptStep - The script step function to execute (user, agent, judge, etc.)
|
|
1654
|
+
* @param stepIndex - The index of the script step for logging and debugging context
|
|
1655
|
+
* @returns The result of the script step execution (void, ScenarioResult, or null)
|
|
1656
|
+
* @throws Error if the script step throws an error (preserves original error)
|
|
1657
|
+
*/
|
|
1658
|
+
private executeScriptStep;
|
|
1184
1659
|
}
|
|
1185
1660
|
|
|
1661
|
+
declare enum StateChangeEventType {
|
|
1662
|
+
MESSAGE_ADDED = "MESSAGE_ADDED"
|
|
1663
|
+
}
|
|
1664
|
+
type StateChangeEvent = {
|
|
1665
|
+
type: StateChangeEventType.MESSAGE_ADDED;
|
|
1666
|
+
};
|
|
1186
1667
|
/**
|
|
1187
1668
|
* Manages the state of a scenario execution.
|
|
1188
1669
|
* This class implements the ScenarioExecutionStateLike interface and provides
|
|
@@ -1193,6 +1674,9 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1193
1674
|
private _messages;
|
|
1194
1675
|
private _currentTurn;
|
|
1195
1676
|
private _threadId;
|
|
1677
|
+
/** Event stream for message additions */
|
|
1678
|
+
private eventSubject;
|
|
1679
|
+
readonly events$: Observable<StateChangeEvent>;
|
|
1196
1680
|
description: string;
|
|
1197
1681
|
config: ScenarioConfig;
|
|
1198
1682
|
constructor(config: ScenarioConfig);
|
|
@@ -1218,8 +1702,11 @@ type execution_ScenarioExecution = ScenarioExecution;
|
|
|
1218
1702
|
declare const execution_ScenarioExecution: typeof ScenarioExecution;
|
|
1219
1703
|
type execution_ScenarioExecutionState = ScenarioExecutionState;
|
|
1220
1704
|
declare const execution_ScenarioExecutionState: typeof ScenarioExecutionState;
|
|
1705
|
+
type execution_StateChangeEvent = StateChangeEvent;
|
|
1706
|
+
type execution_StateChangeEventType = StateChangeEventType;
|
|
1707
|
+
declare const execution_StateChangeEventType: typeof StateChangeEventType;
|
|
1221
1708
|
declare namespace execution {
|
|
1222
|
-
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState };
|
|
1709
|
+
export { execution_ScenarioExecution as ScenarioExecution, execution_ScenarioExecutionState as ScenarioExecutionState, type execution_StateChangeEvent as StateChangeEvent, execution_StateChangeEventType as StateChangeEventType };
|
|
1223
1710
|
}
|
|
1224
1711
|
|
|
1225
1712
|
/**
|
|
@@ -1381,4 +1868,4 @@ declare namespace script {
|
|
|
1381
1868
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
1382
1869
|
declare const scenario: ScenarioApi;
|
|
1383
1870
|
|
|
1384
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|
|
1871
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|