@langwatch/scenario 0.2.0-prerelease.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1444 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ AgentAdapter: () => AgentAdapter,
34
+ AgentRole: () => AgentRole,
35
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
36
+ ScenarioExecution: () => ScenarioExecution,
37
+ ScenarioExecutionState: () => ScenarioExecutionState,
38
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
39
+ agent: () => agent,
40
+ allAgentRoles: () => allAgentRoles,
41
+ defineConfig: () => defineConfig,
42
+ fail: () => fail,
43
+ judge: () => judge,
44
+ judgeAgent: () => judgeAgent,
45
+ message: () => message,
46
+ proceed: () => proceed,
47
+ run: () => run,
48
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema,
49
+ succeed: () => succeed,
50
+ user: () => user,
51
+ userSimulatorAgent: () => userSimulatorAgent
52
+ });
53
+ module.exports = __toCommonJS(index_exports);
54
+
55
+ // src/script/index.ts
56
+ var message = (message2) => {
57
+ return (_state, executor) => executor.message(message2);
58
+ };
59
+ var agent = (content) => {
60
+ return (_state, executor) => executor.agent(content);
61
+ };
62
+ var judge = (content) => {
63
+ return (_state, executor) => executor.judge(content);
64
+ };
65
+ var user = (content) => {
66
+ return (_state, executor) => executor.user(content);
67
+ };
68
+ var proceed = (turns, onTurn, onStep) => {
69
+ return (_state, executor) => executor.proceed(turns, onTurn, onStep);
70
+ };
71
+ var succeed = (reasoning) => {
72
+ return (_state, executor) => executor.succeed(reasoning);
73
+ };
74
+ var fail = (reasoning) => {
75
+ return (_state, executor) => executor.fail(reasoning);
76
+ };
77
+
78
+ // src/execution/scenario-execution.ts
79
+ var import_rxjs = require("rxjs");
80
+
81
+ // src/domain/core/config.ts
82
+ var import_zod = require("zod");
83
+ var scenarioProjectConfigSchema = import_zod.z.object({
84
+ defaultModel: import_zod.z.object({
85
+ model: import_zod.z.custom(),
86
+ temperature: import_zod.z.number().min(0).max(1).optional().default(0),
87
+ maxTokens: import_zod.z.number().optional()
88
+ }).optional(),
89
+ langwatchEndpoint: import_zod.z.string().optional(),
90
+ langwatchApiKey: import_zod.z.string().optional()
91
+ }).strict();
92
+ function defineConfig(config2) {
93
+ return config2;
94
+ }
95
+
96
+ // src/domain/agents/index.ts
97
+ var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
98
+ AgentRole2["USER"] = "User";
99
+ AgentRole2["AGENT"] = "Agent";
100
+ AgentRole2["JUDGE"] = "Judge";
101
+ return AgentRole2;
102
+ })(AgentRole || {});
103
+ var allAgentRoles = ["User" /* USER */, "Agent" /* AGENT */, "Judge" /* JUDGE */];
104
+ var AgentAdapter = class {
105
+ role = "Agent" /* AGENT */;
106
+ constructor(input) {
107
+ void input;
108
+ }
109
+ };
110
+ var UserSimulatorAgentAdapter = class {
111
+ role = "User" /* USER */;
112
+ constructor(input) {
113
+ void input;
114
+ }
115
+ };
116
+ var JudgeAgentAdapter = class {
117
+ role = "Judge" /* JUDGE */;
118
+ constructor(input) {
119
+ void input;
120
+ }
121
+ };
122
+
123
+ // src/utils/ids.ts
124
+ var import_xksuid = require("xksuid");
125
+ var batchRunId = null;
126
+ function generateThreadId() {
127
+ return `thread_${(0, import_xksuid.generate)()}`;
128
+ }
129
+ function generateScenarioRunId() {
130
+ return `scenariorun_${(0, import_xksuid.generate)()}`;
131
+ }
132
+ function generateScenarioId() {
133
+ return `scenario_${(0, import_xksuid.generate)()}`;
134
+ }
135
+ function getBatchRunId() {
136
+ if (!batchRunId) {
137
+ batchRunId = process.env.SCENARIO_BATCH_RUN_ID ?? `scenariobatchrun_${(0, import_xksuid.generate)()}`;
138
+ }
139
+ return batchRunId;
140
+ }
141
+ function generateMessageId() {
142
+ return `scenariomsg_${(0, import_xksuid.generate)()}`;
143
+ }
144
+
145
+ // src/execution/scenario-execution-state.ts
146
+ var ScenarioExecutionState = class {
147
+ _history = [];
148
+ _turn = 0;
149
+ _partialResult = null;
150
+ _threadId = "";
151
+ _agents = [];
152
+ _pendingMessages = /* @__PURE__ */ new Map();
153
+ _pendingRolesOnTurn = [];
154
+ _pendingAgentsOnTurn = /* @__PURE__ */ new Set();
155
+ _agentTimes = /* @__PURE__ */ new Map();
156
+ _totalStartTime = 0;
157
+ /**
158
+ * Creates a new ScenarioExecutionState.
159
+ */
160
+ constructor() {
161
+ this._totalStartTime = Date.now();
162
+ }
163
+ setThreadId(threadId) {
164
+ this._threadId = threadId;
165
+ }
166
+ setAgents(agents) {
167
+ this._agents = agents;
168
+ this._pendingMessages.clear();
169
+ this._agentTimes.clear();
170
+ }
171
+ appendMessage(role, content) {
172
+ const message2 = { role, content };
173
+ this._history.push({ ...message2, id: generateMessageId() });
174
+ }
175
+ appendUserMessage(content) {
176
+ this.appendMessage("user", content);
177
+ }
178
+ appendAssistantMessage(content) {
179
+ this.appendMessage("assistant", content);
180
+ }
181
+ addMessage(message2, fromAgentIdx) {
182
+ this._history.push({ ...message2, id: generateMessageId() });
183
+ for (let idx = 0; idx < this._agents.length; idx++) {
184
+ if (idx === fromAgentIdx) continue;
185
+ if (!this._pendingMessages.has(idx)) {
186
+ this._pendingMessages.set(idx, []);
187
+ }
188
+ this._pendingMessages.get(idx).push(message2);
189
+ }
190
+ }
191
+ addMessages(messages, fromAgentIdx) {
192
+ for (const message2 of messages) {
193
+ this.addMessage(message2, fromAgentIdx);
194
+ }
195
+ }
196
+ getPendingMessages(agentIdx) {
197
+ return this._pendingMessages.get(agentIdx) || [];
198
+ }
199
+ clearPendingMessages(agentIdx) {
200
+ this._pendingMessages.set(agentIdx, []);
201
+ }
202
+ newTurn() {
203
+ this._pendingAgentsOnTurn = new Set(this._agents);
204
+ this._pendingRolesOnTurn = [
205
+ "User" /* USER */,
206
+ "Agent" /* AGENT */,
207
+ "Judge" /* JUDGE */
208
+ ];
209
+ if (this._turn === null) {
210
+ this._turn = 1;
211
+ } else {
212
+ this._turn++;
213
+ }
214
+ }
215
+ removePendingRole(role) {
216
+ const index = this._pendingRolesOnTurn.indexOf(role);
217
+ if (index > -1) {
218
+ this._pendingRolesOnTurn.splice(index, 1);
219
+ }
220
+ }
221
+ removePendingAgent(agent2) {
222
+ this._pendingAgentsOnTurn.delete(agent2);
223
+ }
224
+ getNextAgentForRole(role) {
225
+ for (let i = 0; i < this._agents.length; i++) {
226
+ const agent2 = this._agents[i];
227
+ if (agent2.role === role && this._pendingAgentsOnTurn.has(agent2)) {
228
+ return { index: i, agent: agent2 };
229
+ }
230
+ }
231
+ return null;
232
+ }
233
+ addAgentTime(agentIdx, time) {
234
+ const currentTime = this._agentTimes.get(agentIdx) || 0;
235
+ this._agentTimes.set(agentIdx, currentTime + time);
236
+ }
237
+ hasResult() {
238
+ return this._partialResult !== null;
239
+ }
240
+ setResult(result) {
241
+ this._partialResult = result;
242
+ }
243
+ get lastMessage() {
244
+ return this._history[this._history.length - 1];
245
+ }
246
+ get lastUserMessage() {
247
+ return this._history.findLast((message2) => message2.role === "user");
248
+ }
249
+ get lastAssistantMessage() {
250
+ return this._history.findLast((message2) => message2.role === "assistant");
251
+ }
252
+ get lastToolCall() {
253
+ return this._history.findLast((message2) => message2.role === "tool");
254
+ }
255
+ getLastToolCallByToolName(toolName) {
256
+ const toolMessage = this._history.findLast(
257
+ (message2) => message2.role === "tool" && message2.content.find(
258
+ (part) => part.type === "tool-result" && part.toolName === toolName
259
+ )
260
+ );
261
+ return toolMessage;
262
+ }
263
+ hasToolCall(toolName) {
264
+ return this._history.some(
265
+ (message2) => message2.role === "tool" && message2.content.find(
266
+ (part) => part.type === "tool-result" && part.toolName === toolName
267
+ )
268
+ );
269
+ }
270
+ get history() {
271
+ return this._history;
272
+ }
273
+ get historyWithoutLastMessage() {
274
+ return this._history.slice(0, -1);
275
+ }
276
+ get historyWithoutLastUserMessage() {
277
+ const lastUserMessageIndex = this._history.findLastIndex((message2) => message2.role === "user");
278
+ if (lastUserMessageIndex === -1) return this._history;
279
+ return this._history.slice(0, lastUserMessageIndex);
280
+ }
281
+ get turn() {
282
+ return this._turn;
283
+ }
284
+ set turn(turn) {
285
+ this._turn = turn;
286
+ }
287
+ get threadId() {
288
+ return this._threadId;
289
+ }
290
+ get agents() {
291
+ return this._agents;
292
+ }
293
+ get pendingRolesOnTurn() {
294
+ return this._pendingRolesOnTurn;
295
+ }
296
+ set pendingRolesOnTurn(roles) {
297
+ this._pendingRolesOnTurn = roles;
298
+ }
299
+ get pendingAgentsOnTurn() {
300
+ return Array.from(this._pendingAgentsOnTurn);
301
+ }
302
+ set pendingAgentsOnTurn(agents) {
303
+ this._pendingAgentsOnTurn = new Set(agents);
304
+ }
305
+ get partialResult() {
306
+ return this._partialResult;
307
+ }
308
+ get totalTime() {
309
+ return Date.now() - this._totalStartTime;
310
+ }
311
+ get agentTimes() {
312
+ return new Map(this._agentTimes);
313
+ }
314
+ removeLastPendingRole() {
315
+ this._pendingRolesOnTurn.pop();
316
+ }
317
+ };
318
+
319
+ // src/events/schema.ts
320
+ var import_core = require("@ag-ui/core");
321
+ var import_zod2 = require("zod");
322
+ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
323
+ ScenarioRunStatus2["SUCCESS"] = "SUCCESS";
324
+ ScenarioRunStatus2["ERROR"] = "ERROR";
325
+ ScenarioRunStatus2["CANCELLED"] = "CANCELLED";
326
+ ScenarioRunStatus2["IN_PROGRESS"] = "IN_PROGRESS";
327
+ ScenarioRunStatus2["PENDING"] = "PENDING";
328
+ ScenarioRunStatus2["FAILED"] = "FAILED";
329
+ return ScenarioRunStatus2;
330
+ })(ScenarioRunStatus || {});
331
+ var baseEventSchema = import_zod2.z.object({
332
+ type: import_zod2.z.nativeEnum(import_core.EventType),
333
+ timestamp: import_zod2.z.number().optional(),
334
+ rawEvent: import_zod2.z.any().optional()
335
+ });
336
+ var baseScenarioEventSchema = baseEventSchema.extend({
337
+ batchRunId: import_zod2.z.string(),
338
+ scenarioId: import_zod2.z.string(),
339
+ scenarioRunId: import_zod2.z.string()
340
+ });
341
+ var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
342
+ type: import_zod2.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
343
+ metadata: import_zod2.z.object({
344
+ name: import_zod2.z.string(),
345
+ description: import_zod2.z.string().optional()
346
+ // config: z.record(z.unknown()).optional(),
347
+ })
348
+ });
349
+ var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
350
+ type: import_zod2.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
351
+ status: import_zod2.z.nativeEnum(ScenarioRunStatus)
352
+ // error: z
353
+ // .object({
354
+ // message: z.string(),
355
+ // code: z.string().optional(),
356
+ // stack: z.string().optional(),
357
+ // })
358
+ // .optional(),
359
+ // metrics: z.record(z.number()).optional(),
360
+ });
361
+ var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
362
+ baseScenarioEventSchema.extend({
363
+ type: import_zod2.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
364
+ })
365
+ );
366
+ var scenarioEventSchema = import_zod2.z.discriminatedUnion("type", [
367
+ scenarioRunStartedSchema,
368
+ scenarioRunFinishedSchema,
369
+ scenarioMessageSnapshotSchema
370
+ ]);
371
+ var successSchema = import_zod2.z.object({ success: import_zod2.z.boolean() });
372
+ var errorSchema = import_zod2.z.object({ error: import_zod2.z.string() });
373
+ var stateSchema = import_zod2.z.object({
374
+ state: import_zod2.z.object({
375
+ messages: import_zod2.z.array(import_zod2.z.any()),
376
+ status: import_zod2.z.string()
377
+ })
378
+ });
379
+ var runsSchema = import_zod2.z.object({ runs: import_zod2.z.array(import_zod2.z.string()) });
380
+ var eventsSchema = import_zod2.z.object({ events: import_zod2.z.array(scenarioEventSchema) });
381
+
382
+ // src/utils/logger.ts
383
+ var Logger = class _Logger {
384
+ constructor(context) {
385
+ this.context = context;
386
+ }
387
+ /**
388
+ * Creates a logger with context (e.g., class name)
389
+ */
390
+ static create(context) {
391
+ return new _Logger(context);
392
+ }
393
+ /**
394
+ * Checks if logging should occur based on LOG_LEVEL env var
395
+ */
396
+ shouldLog(level) {
397
+ const logLevel = (process.env.SCENARIO_LOG_LEVEL || "").toLowerCase();
398
+ const levels = ["error", "warn", "info", "debug"];
399
+ const currentLevelIndex = levels.indexOf(logLevel);
400
+ const requestedLevelIndex = levels.indexOf(level);
401
+ return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
402
+ }
403
+ formatMessage(message2) {
404
+ return this.context ? `[${this.context}] ${message2}` : message2;
405
+ }
406
+ error(message2, data) {
407
+ if (this.shouldLog("error")) {
408
+ const formattedMessage = this.formatMessage(message2);
409
+ if (data) {
410
+ console.error(formattedMessage, data);
411
+ } else {
412
+ console.error(formattedMessage);
413
+ }
414
+ }
415
+ }
416
+ warn(message2, data) {
417
+ if (this.shouldLog("warn")) {
418
+ const formattedMessage = this.formatMessage(message2);
419
+ if (data) {
420
+ console.warn(formattedMessage, data);
421
+ } else {
422
+ console.warn(formattedMessage);
423
+ }
424
+ }
425
+ }
426
+ info(message2, data) {
427
+ if (this.shouldLog("info")) {
428
+ const formattedMessage = this.formatMessage(message2);
429
+ if (data) {
430
+ console.info(formattedMessage, data);
431
+ } else {
432
+ console.info(formattedMessage);
433
+ }
434
+ }
435
+ }
436
+ debug(message2, data) {
437
+ if (this.shouldLog("debug")) {
438
+ const formattedMessage = this.formatMessage(message2);
439
+ if (data) {
440
+ console.log(formattedMessage, data);
441
+ } else {
442
+ console.log(formattedMessage);
443
+ }
444
+ }
445
+ }
446
+ };
447
+
448
+ // src/execution/scenario-execution.ts
449
+ var batchRunId2 = getBatchRunId();
450
+ function convertAgentReturnTypesToMessages(response, role) {
451
+ if (typeof response === "string")
452
+ return [{ role, content: response }];
453
+ if (Array.isArray(response))
454
+ return response;
455
+ if (typeof response === "object" && "role" in response)
456
+ return [response];
457
+ return [];
458
+ }
459
+ var ScenarioExecution = class {
460
+ state = new ScenarioExecutionState();
461
+ eventSubject = new import_rxjs.Subject();
462
+ logger = new Logger("scenario.execution.ScenarioExecution");
463
+ config;
464
+ /**
465
+ * An observable stream of events that occur during the scenario execution.
466
+ * Subscribe to this to monitor the progress of the scenario in real-time.
467
+ */
468
+ events$ = this.eventSubject.asObservable();
469
+ /**
470
+ * Creates a new ScenarioExecution instance.
471
+ * @param config The scenario configuration.
472
+ * @param script The script steps to execute.
473
+ */
474
+ constructor(config2, script) {
475
+ this.config = {
476
+ id: config2.id ?? generateScenarioId(),
477
+ name: config2.name,
478
+ description: config2.description,
479
+ agents: config2.agents,
480
+ script,
481
+ verbose: config2.verbose ?? false,
482
+ maxTurns: config2.maxTurns ?? 10,
483
+ threadId: config2.threadId ?? generateThreadId()
484
+ };
485
+ this.reset();
486
+ }
487
+ /**
488
+ * The history of messages in the conversation.
489
+ */
490
+ get history() {
491
+ return this.state.history;
492
+ }
493
+ /**
494
+ * The unique identifier for the conversation thread.
495
+ */
496
+ get threadId() {
497
+ return this.state.threadId;
498
+ }
499
+ /**
500
+ * Executes the entire scenario from start to finish.
501
+ * This will run through the script and any automatic proceeding logic until a
502
+ * final result (success, failure, or error) is determined.
503
+ * @returns A promise that resolves with the final result of the scenario.
504
+ */
505
+ async execute() {
506
+ this.reset();
507
+ const scenarioRunId = generateScenarioRunId();
508
+ this.emitRunStarted({ scenarioRunId });
509
+ try {
510
+ for (const scriptStep of this.config.script) {
511
+ this.logger.debug(`[${this.config.id}] Executing script step`, {
512
+ scriptStep
513
+ });
514
+ const result = await scriptStep(this.state, this);
515
+ this.emitMessageSnapshot({ scenarioRunId });
516
+ if (result && typeof result === "object" && "success" in result) {
517
+ this.emitRunFinished({
518
+ scenarioRunId,
519
+ status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */
520
+ });
521
+ return result;
522
+ }
523
+ }
524
+ this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
525
+ return this.reachedMaxTurns([
526
+ "Reached end of script without conclusion, add one of the following to the end of the script:",
527
+ "- `Scenario.proceed()` to let the simulation continue to play out",
528
+ "- `Scenario.judge()` to force criteria judgement",
529
+ "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
530
+ ].join("\n"));
531
+ } catch (error) {
532
+ this.emitRunFinished({
533
+ scenarioRunId,
534
+ status: "ERROR" /* ERROR */
535
+ });
536
+ throw error;
537
+ }
538
+ }
539
+ /**
540
+ * Executes a single step in the scenario.
541
+ * A step usually corresponds to a single agent's turn. This method is useful
542
+ * for manually controlling the scenario's progress.
543
+ * @returns A promise that resolves with the new messages added during the step, or a final scenario result if the step concludes the scenario.
544
+ */
545
+ async step() {
546
+ const result = await this._step();
547
+ if (result === null) throw new Error("No result from step");
548
+ return result;
549
+ }
550
+ async _step(goToNextTurn = true, onTurn) {
551
+ if (this.state.pendingRolesOnTurn.length === 0) {
552
+ if (!goToNextTurn) return null;
553
+ this.state.newTurn();
554
+ if (onTurn) await onTurn(this.state);
555
+ if (this.state.turn != null && this.state.turn >= this.config.maxTurns)
556
+ return this.reachedMaxTurns();
557
+ }
558
+ const currentRole = this.state.pendingRolesOnTurn[0];
559
+ const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
560
+ if (!nextAgent) {
561
+ this.state.removePendingRole(currentRole);
562
+ return this._step(goToNextTurn, onTurn);
563
+ }
564
+ this.state.removePendingAgent(nextAgent);
565
+ return await this.callAgent(idx, currentRole);
566
+ }
567
+ async callAgent(idx, role, judgmentRequest = false) {
568
+ const agent2 = this.state.agents[idx];
569
+ const startTime = Date.now();
570
+ const agentInput = {
571
+ threadId: this.state.threadId,
572
+ messages: this.state.history,
573
+ newMessages: this.state.getPendingMessages(idx),
574
+ requestedRole: role,
575
+ judgmentRequest,
576
+ scenarioState: this.state,
577
+ scenarioConfig: this.config
578
+ };
579
+ const agentResponse = await agent2.call(agentInput);
580
+ const endTime = Date.now();
581
+ this.state.addAgentTime(idx, endTime - startTime);
582
+ this.state.clearPendingMessages(idx);
583
+ if (typeof agentResponse === "object" && agentResponse && "success" in agentResponse) {
584
+ return agentResponse;
585
+ }
586
+ const messages = convertAgentReturnTypesToMessages(
587
+ agentResponse,
588
+ role === "User" /* USER */ ? "user" : "assistant"
589
+ );
590
+ this.state.addMessages(messages, idx);
591
+ return messages;
592
+ }
593
+ nextAgentForRole(role) {
594
+ for (const agent2 of this.state.agents) {
595
+ if (agent2.role === role && this.state.pendingAgentsOnTurn.includes(agent2) && this.state.pendingRolesOnTurn.includes(role)) {
596
+ return { idx: this.state.agents.indexOf(agent2), agent: agent2 };
597
+ }
598
+ }
599
+ return { idx: -1, agent: null };
600
+ }
601
+ reachedMaxTurns(errorMessage) {
602
+ var _a;
603
+ const agentRoleAgentsIdx = this.state.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
604
+ const agentTimes = agentRoleAgentsIdx.map((i) => this.state.agentTimes.get(i) || 0);
605
+ const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
606
+ return {
607
+ success: false,
608
+ messages: this.state.history,
609
+ reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
610
+ passedCriteria: [],
611
+ failedCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? [],
612
+ totalTime: this.state.totalTime,
613
+ agentTime: totalAgentTime
614
+ };
615
+ }
616
+ getJudgeAgent() {
617
+ return this.state.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
618
+ }
619
+ consumeUntilRole(role) {
620
+ while (this.state.pendingRolesOnTurn.length > 0) {
621
+ const nextRole = this.state.pendingRolesOnTurn[0];
622
+ if (nextRole === role) break;
623
+ this.state.pendingRolesOnTurn.pop();
624
+ }
625
+ }
626
+ async scriptCallAgent(role, content, judgmentRequest = false) {
627
+ this.consumeUntilRole(role);
628
+ let index = -1;
629
+ let agent2 = null;
630
+ const nextAgent = this.state.getNextAgentForRole(role);
631
+ if (!nextAgent) {
632
+ this.state.newTurn();
633
+ this.consumeUntilRole(role);
634
+ const nextAgent2 = this.state.getNextAgentForRole(role);
635
+ if (!nextAgent2) {
636
+ let roleClass = "";
637
+ switch (role) {
638
+ case "User" /* USER */:
639
+ roleClass = "a scenario.userSimulatorAgent()";
640
+ break;
641
+ case "Agent" /* AGENT */:
642
+ roleClass = "a scenario.agent()";
643
+ break;
644
+ case "Judge" /* JUDGE */:
645
+ roleClass = "a scenario.judgeAgent()";
646
+ break;
647
+ default:
648
+ roleClass = "your agent";
649
+ }
650
+ if (content)
651
+ throw new Error(
652
+ `Cannot generate a message for role \`${role}\` with content \`${content}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
653
+ );
654
+ throw new Error(
655
+ `Cannot generate a message for role \`${role}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
656
+ );
657
+ }
658
+ index = nextAgent2.index;
659
+ agent2 = nextAgent2.agent;
660
+ } else {
661
+ index = nextAgent.index;
662
+ agent2 = nextAgent.agent;
663
+ }
664
+ this.state.removePendingAgent(agent2);
665
+ if (content) {
666
+ if (typeof content === "string") {
667
+ if (role === "User" /* USER */) {
668
+ this.state.addMessage({ role: "user", content });
669
+ } else {
670
+ this.state.addMessage({ role: "assistant", content });
671
+ }
672
+ } else {
673
+ this.state.addMessage(content);
674
+ }
675
+ return null;
676
+ }
677
+ const result = await this.callAgent(index, role, judgmentRequest);
678
+ if (Array.isArray(result))
679
+ return null;
680
+ return result;
681
+ }
682
+ /**
683
+ * Adds a message to the conversation history.
684
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
685
+ * @param message The message to add.
686
+ */
687
+ async message(message2) {
688
+ if (message2.role === "user") {
689
+ await this.scriptCallAgent("User" /* USER */, message2);
690
+ } else if (message2.role === "assistant") {
691
+ await this.scriptCallAgent("Agent" /* AGENT */, message2);
692
+ } else {
693
+ this.state.addMessage(message2);
694
+ }
695
+ }
696
+ /**
697
+ * Executes a user turn.
698
+ * If content is provided, it's used as the user's message.
699
+ * If not, the user simulator agent is called to generate a message.
700
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
701
+ * @param content The optional content of the user's message.
702
+ */
703
+ async user(content) {
704
+ await this.scriptCallAgent("User" /* USER */, content);
705
+ }
706
+ /**
707
+ * Executes an agent turn.
708
+ * If content is provided, it's used as the agent's message.
709
+ * If not, the agent under test is called to generate a response.
710
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
711
+ * @param content The optional content of the agent's message.
712
+ */
713
+ async agent(content) {
714
+ await this.scriptCallAgent("Agent" /* AGENT */, content);
715
+ }
716
+ /**
717
+ * Invokes the judge agent to evaluate the current state of the conversation.
718
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
719
+ * @param content Optional message to pass to the judge.
720
+ * @returns A promise that resolves with the scenario result if the judge makes a final decision, otherwise null.
721
+ */
722
+ async judge(content) {
723
+ return await this.scriptCallAgent("Judge" /* JUDGE */, content, true);
724
+ }
725
+ /**
726
+ * Lets the scenario proceed automatically for a specified number of turns.
727
+ * This simulates the natural flow of conversation between agents.
728
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
729
+ * @param turns The number of turns to proceed. If undefined, runs until a conclusion or max turns is reached.
730
+ * @param onTurn A callback executed at the end of each turn.
731
+ * @param onStep A callback executed after each agent interaction.
732
+ * @returns A promise that resolves with the scenario result if a conclusion is reached.
733
+ */
734
+ async proceed(turns, onTurn, onStep) {
735
+ let initialTurn = this.state.turn;
736
+ while (true) {
737
+ const goToNextTurn = turns === void 0 || initialTurn === null || this.state.turn != null && this.state.turn + 1 < initialTurn + turns;
738
+ const nextMessage = await this._step(goToNextTurn, onTurn);
739
+ if (initialTurn === null)
740
+ initialTurn = this.state.turn;
741
+ if (nextMessage === null) {
742
+ return null;
743
+ }
744
+ if (onStep) await onStep(this.state);
745
+ if (nextMessage !== null && typeof nextMessage === "object" && "success" in nextMessage)
746
+ return nextMessage;
747
+ }
748
+ }
749
+ /**
750
+ * Immediately ends the scenario with a success verdict.
751
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
752
+ * @param reasoning An optional explanation for the success.
753
+ * @returns A promise that resolves with the final successful scenario result.
754
+ */
755
+ async succeed(reasoning) {
756
+ return {
757
+ success: true,
758
+ messages: this.state.history,
759
+ reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
760
+ passedCriteria: [],
761
+ failedCriteria: []
762
+ };
763
+ }
764
+ /**
765
+ * Immediately ends the scenario with a failure verdict.
766
+ * This is part of the `ScenarioExecutionLike` interface used by script steps.
767
+ * @param reasoning An optional explanation for the failure.
768
+ * @returns A promise that resolves with the final failed scenario result.
769
+ */
770
+ async fail(reasoning) {
771
+ return {
772
+ success: false,
773
+ messages: this.state.history,
774
+ reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
775
+ passedCriteria: [],
776
+ failedCriteria: []
777
+ };
778
+ }
779
+ reset() {
780
+ this.state = new ScenarioExecutionState();
781
+ this.state.setThreadId(this.config.threadId || generateThreadId());
782
+ this.state.setAgents(this.config.agents);
783
+ this.state.newTurn();
784
+ this.state.turn = 0;
785
+ }
786
+ // =====================================================
787
+ // Event Emission Methods
788
+ // =====================================================
789
+ // These methods handle the creation and emission of
790
+ // scenario events for external consumption and monitoring
791
+ // =====================================================
792
+ /**
793
+ * Emits an event to the event stream for external consumption.
794
+ */
795
+ emitEvent(event) {
796
+ this.eventSubject.next(event);
797
+ }
798
+ /**
799
+ * Creates base event properties shared across all scenario events.
800
+ */
801
+ makeBaseEvent({ scenarioRunId }) {
802
+ return {
803
+ batchRunId: batchRunId2,
804
+ scenarioId: this.config.id,
805
+ scenarioRunId,
806
+ timestamp: Date.now(),
807
+ rawEvent: void 0
808
+ };
809
+ }
810
+ /**
811
+ * Emits a run started event to indicate scenario execution has begun.
812
+ */
813
+ emitRunStarted({ scenarioRunId }) {
814
+ this.emitEvent({
815
+ ...this.makeBaseEvent({ scenarioRunId }),
816
+ type: "SCENARIO_RUN_STARTED" /* RUN_STARTED */,
817
+ metadata: {
818
+ name: this.config.name,
819
+ description: this.config.description
820
+ }
821
+ });
822
+ }
823
+ /**
824
+ * Emits a message snapshot event containing current conversation history.
825
+ */
826
+ emitMessageSnapshot({ scenarioRunId }) {
827
+ this.emitEvent({
828
+ ...this.makeBaseEvent({ scenarioRunId }),
829
+ type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
830
+ messages: this.state.history
831
+ // Add any other required fields from MessagesSnapshotEventSchema
832
+ });
833
+ }
834
+ /**
835
+ * Emits a run finished event with the final execution status.
836
+ */
837
+ emitRunFinished({
838
+ scenarioRunId,
839
+ status
840
+ }) {
841
+ this.emitEvent({
842
+ ...this.makeBaseEvent({ scenarioRunId }),
843
+ type: "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */,
844
+ status
845
+ // Add error/metrics fields if needed
846
+ });
847
+ }
848
+ };
849
+
850
+ // src/config/load.ts
851
+ var import_promises = __toESM(require("fs/promises"));
852
+ var import_node_path = __toESM(require("path"));
853
+ var import_node_url = require("url");
854
+ async function loadScenarioProjectConfig() {
855
+ const cwd = process.cwd();
856
+ const configNames = [
857
+ "scenario.config.js",
858
+ "scenario.config.mjs"
859
+ ];
860
+ for (const name of configNames) {
861
+ const fullPath = import_node_path.default.join(cwd, name);
862
+ try {
863
+ await import_promises.default.access(fullPath);
864
+ const configModule = await import((0, import_node_url.pathToFileURL)(fullPath).href);
865
+ const config2 = configModule.default || configModule;
866
+ const parsed = scenarioProjectConfigSchema.safeParse(config2);
867
+ if (!parsed.success) {
868
+ throw new Error(
869
+ `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
870
+ );
871
+ }
872
+ return parsed.data;
873
+ } catch (error) {
874
+ if (error instanceof Error && "code" in error && error.code === "ENOENT") {
875
+ continue;
876
+ }
877
+ throw error;
878
+ }
879
+ }
880
+ return await scenarioProjectConfigSchema.parseAsync({});
881
+ }
882
+
883
+ // src/events/event-bus.ts
884
+ var import_rxjs2 = require("rxjs");
885
+
886
+ // src/events/event-reporter.ts
887
+ var EventReporter = class {
888
+ eventsEndpoint;
889
+ apiKey;
890
+ logger = new Logger("scenario.events.EventReporter");
891
+ constructor(config2) {
892
+ this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
893
+ this.apiKey = config2.apiKey ?? "";
894
+ if (!process.env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO) {
895
+ console.log("=== Scenario Simulation Reporting ===");
896
+ if (!this.apiKey) {
897
+ console.warn("LangWatch API key not configured, simulations will be local");
898
+ console.warn(`To enable simulation reporting in the LangWatch dashboard, configure your LangWatch API key (via LANGWATCH_API_KEY, or scenario.config.js)`);
899
+ } else {
900
+ console.log("Simulation reporting is enabled");
901
+ console.log(`Endpoint: ${config2.endpoint} -> ${this.eventsEndpoint.href}`);
902
+ console.log(`API Key: ${!this.apiKey ? "not configured" : "configured"}`);
903
+ }
904
+ console.log("=== Scenario Simulation Reporting ===");
905
+ }
906
+ }
907
+ /**
908
+ * Posts an event to the configured endpoint.
909
+ * Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
910
+ */
911
+ async postEvent(event) {
912
+ this.logger.debug(`[${event.type}] Posting event`, {
913
+ event
914
+ });
915
+ if (!this.eventsEndpoint) {
916
+ this.logger.warn(
917
+ "No LANGWATCH_ENDPOINT configured, skipping event posting"
918
+ );
919
+ return;
920
+ }
921
+ try {
922
+ const response = await fetch(this.eventsEndpoint.href, {
923
+ method: "POST",
924
+ body: JSON.stringify(event),
925
+ headers: {
926
+ "Content-Type": "application/json",
927
+ "X-Auth-Token": this.apiKey
928
+ }
929
+ });
930
+ this.logger.debug(
931
+ `[${event.type}] Event POST response status: ${response.status}`
932
+ );
933
+ if (response.ok) {
934
+ const data = await response.json();
935
+ this.logger.debug(`[${event.type}] Event POST response:`, data);
936
+ } else {
937
+ const errorText = await response.text();
938
+ this.logger.error(`[${event.type}] Event POST failed:`, {
939
+ status: response.status,
940
+ statusText: response.statusText,
941
+ error: errorText,
942
+ event
943
+ });
944
+ }
945
+ } catch (error) {
946
+ this.logger.error(`[${event.type}] Event POST error:`, {
947
+ error,
948
+ event,
949
+ endpoint: this.eventsEndpoint
950
+ });
951
+ }
952
+ }
953
+ };
954
+
955
+ // src/events/event-bus.ts
956
+ var EventBus = class {
957
+ events$ = new import_rxjs2.Subject();
958
+ eventReporter;
959
+ processingPromise = null;
960
+ logger = new Logger("scenario.events.EventBus");
961
+ constructor(config2) {
962
+ this.eventReporter = new EventReporter(config2);
963
+ }
964
+ /**
965
+ * Publishes an event into the processing pipeline.
966
+ */
967
+ publish(event) {
968
+ this.logger.debug(`[${event.type}] Publishing event`, {
969
+ event
970
+ });
971
+ this.events$.next(event);
972
+ }
973
+ /**
974
+ * Begins listening for and processing events.
975
+ * Returns a promise that resolves when a RUN_FINISHED event is fully processed.
976
+ */
977
+ listen() {
978
+ this.logger.debug("Listening for events");
979
+ if (this.processingPromise) {
980
+ return this.processingPromise;
981
+ }
982
+ this.processingPromise = new Promise((resolve, reject) => {
983
+ this.events$.pipe(
984
+ (0, import_rxjs2.concatMap)(async (event) => {
985
+ this.logger.debug(`[${event.type}] Processing event`, {
986
+ event
987
+ });
988
+ await this.eventReporter.postEvent(event);
989
+ return event;
990
+ }),
991
+ (0, import_rxjs2.catchError)((error) => {
992
+ this.logger.error("Error in event stream:", error);
993
+ return import_rxjs2.EMPTY;
994
+ })
995
+ ).subscribe({
996
+ next: (event) => {
997
+ this.logger.debug(`[${event.type}] Event processed`, {
998
+ event
999
+ });
1000
+ if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
1001
+ resolve();
1002
+ }
1003
+ },
1004
+ error: (error) => {
1005
+ this.logger.error("Error in event stream:", error);
1006
+ reject(error);
1007
+ }
1008
+ });
1009
+ });
1010
+ return this.processingPromise;
1011
+ }
1012
+ /**
1013
+ * Stops accepting new events and drains the processing queue.
1014
+ */
1015
+ async drain() {
1016
+ this.logger.debug("Draining event stream");
1017
+ this.events$.unsubscribe();
1018
+ if (this.processingPromise) {
1019
+ await this.processingPromise;
1020
+ }
1021
+ }
1022
+ /**
1023
+ * Subscribes to an event stream.
1024
+ * @param source$ - The event stream to subscribe to.
1025
+ */
1026
+ subscribeTo(source$) {
1027
+ this.logger.debug("Subscribing to event stream");
1028
+ return source$.subscribe(this.events$);
1029
+ }
1030
+ };
1031
+
1032
+ // src/runner/run.ts
1033
+ async function run(cfg) {
1034
+ if (!cfg.name) {
1035
+ throw new Error("Scenario name is required");
1036
+ }
1037
+ if (!cfg.description) {
1038
+ throw new Error("Scenario description is required");
1039
+ }
1040
+ if ((cfg.maxTurns || 10) < 1) {
1041
+ throw new Error("Max turns must be at least 1");
1042
+ }
1043
+ if (cfg.agents.length === 0) {
1044
+ throw new Error("At least one agent is required");
1045
+ }
1046
+ if (!cfg.agents.find((agent2) => agent2.role === "Agent" /* AGENT */)) {
1047
+ throw new Error("At least one non-user/non-judge agent is required");
1048
+ }
1049
+ cfg.agents.forEach((agent2, i) => {
1050
+ if (!allAgentRoles.includes(agent2.role)) {
1051
+ throw new Error(`Agent ${i} has invalid role: ${agent2.role}`);
1052
+ }
1053
+ });
1054
+ if (!cfg.threadId) {
1055
+ cfg.threadId = generateThreadId();
1056
+ }
1057
+ const steps = cfg.script || [proceed()];
1058
+ const execution = new ScenarioExecution(cfg, steps);
1059
+ let eventBus = null;
1060
+ let subscription = null;
1061
+ try {
1062
+ const projectConfig = await loadScenarioProjectConfig();
1063
+ eventBus = new EventBus({
1064
+ endpoint: projectConfig.langwatchEndpoint ?? process.env.LANGWATCH_ENDPOINT ?? "https://app.langwatch.ai",
1065
+ apiKey: projectConfig.langwatchApiKey ?? process.env.LANGWATCH_API_KEY
1066
+ });
1067
+ eventBus.listen();
1068
+ subscription = eventBus.subscribeTo(execution.events$);
1069
+ const result = await execution.execute();
1070
+ if (cfg.verbose && !result.success) {
1071
+ console.log(`Scenario failed: ${cfg.name}`);
1072
+ console.log(`Reasoning: ${result.reasoning}`);
1073
+ console.log("--------------------------------");
1074
+ console.log(`Passed criteria: ${result.passedCriteria.join("\n- ")}`);
1075
+ console.log(`Failed criteria: ${result.failedCriteria.join("\n- ")}`);
1076
+ console.log(result.messages.map(formatMessage).join("\n"));
1077
+ }
1078
+ return result;
1079
+ } finally {
1080
+ await (eventBus == null ? void 0 : eventBus.drain());
1081
+ subscription == null ? void 0 : subscription.unsubscribe();
1082
+ }
1083
+ }
1084
+ function formatMessage(m) {
1085
+ switch (m.role) {
1086
+ case "user":
1087
+ return `User: ${m.content}`;
1088
+ case "assistant":
1089
+ return `Assistant: ${formatParts(m.content)}`;
1090
+ case "tool":
1091
+ return `Tool: ${formatParts(m.content)}`;
1092
+ default:
1093
+ return `${m.role}: ${m.content}`;
1094
+ }
1095
+ }
1096
+ function formatParts(part) {
1097
+ if (typeof part === "string") {
1098
+ return part;
1099
+ }
1100
+ if (Array.isArray(part)) {
1101
+ if (part.length === 1) {
1102
+ return formatPart(part[0]);
1103
+ }
1104
+ return `
1105
+ ${part.map(formatPart).join("\n")}`;
1106
+ }
1107
+ return "Unknown content: " + JSON.stringify(part);
1108
+ }
1109
+ function formatPart(part) {
1110
+ switch (part.type) {
1111
+ case "text":
1112
+ return part.text;
1113
+ case "file":
1114
+ return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
1115
+ case "tool-call":
1116
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
1117
+ case "tool-result":
1118
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
1119
+ case "reasoning":
1120
+ return `(reasoning): ${part.text}`;
1121
+ case "redacted-reasoning":
1122
+ return `(redacted reasoning): ${part.data}`;
1123
+ default:
1124
+ return `Unknown content: ${JSON.stringify(part)}`;
1125
+ }
1126
+ }
1127
+
1128
+ // src/agents/judge-agent.ts
1129
+ var import_ai = require("ai");
1130
+ var import_zod3 = require("zod");
1131
+
1132
+ // src/agents/utils.ts
1133
+ var toolMessageRole = "tool";
1134
+ var assistantMessageRole = "assistant";
1135
+ var userMessageRole = "user";
1136
+ var groupMessagesByToolBoundaries = (messages) => {
1137
+ const segments = [];
1138
+ let currentSegment = [];
1139
+ for (const message2 of messages) {
1140
+ currentSegment.push(message2);
1141
+ if (message2.role === toolMessageRole) {
1142
+ segments.push(currentSegment);
1143
+ currentSegment = [];
1144
+ }
1145
+ }
1146
+ if (currentSegment.length > 0) {
1147
+ segments.push(currentSegment);
1148
+ }
1149
+ return segments;
1150
+ };
1151
+ var segmentHasToolMessages = (segment) => {
1152
+ return segment.some((message2) => {
1153
+ if (message2.role === toolMessageRole) return true;
1154
+ if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
1155
+ return message2.content.some((part) => part.type === "tool-call");
1156
+ }
1157
+ return false;
1158
+ });
1159
+ };
1160
+ var reverseSegmentRoles = (segment) => {
1161
+ return segment.map((message2) => {
1162
+ const hasStringContent = typeof message2.content === "string";
1163
+ if (!hasStringContent) return message2;
1164
+ const roleMap = {
1165
+ [userMessageRole]: assistantMessageRole,
1166
+ [assistantMessageRole]: userMessageRole
1167
+ };
1168
+ const newRole = roleMap[message2.role];
1169
+ if (!newRole) return message2;
1170
+ return {
1171
+ role: newRole,
1172
+ content: message2.content
1173
+ };
1174
+ });
1175
+ };
1176
+ var messageRoleReversal = (messages) => {
1177
+ const segments = groupMessagesByToolBoundaries(messages);
1178
+ const processedSegments = segments.map(
1179
+ (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
1180
+ );
1181
+ return processedSegments.flat();
1182
+ };
1183
+ var criterionToParamName = (criterion) => {
1184
+ return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
1185
+ };
1186
+
1187
+ // src/config/index.ts
1188
+ var logger = new Logger("scenario.config");
1189
+ var configLoaded = false;
1190
+ var config = null;
1191
+ var configLoadPromise = null;
1192
+ async function loadProjectConfig() {
1193
+ if (configLoaded) {
1194
+ return;
1195
+ }
1196
+ if (configLoadPromise) {
1197
+ return configLoadPromise;
1198
+ }
1199
+ configLoadPromise = (async () => {
1200
+ try {
1201
+ config = await loadScenarioProjectConfig();
1202
+ logger.info("loaded scenario project config", { config });
1203
+ } catch (error) {
1204
+ logger.error("error loading scenario project config", { error });
1205
+ } finally {
1206
+ configLoaded = true;
1207
+ }
1208
+ })();
1209
+ return configLoadPromise;
1210
+ }
1211
+ async function getProjectConfig() {
1212
+ await loadProjectConfig();
1213
+ return config;
1214
+ }
1215
+
1216
+ // src/utils/config.ts
1217
+ function mergeConfig(config2, projectConfig) {
1218
+ if (!projectConfig) {
1219
+ return config2;
1220
+ }
1221
+ return {
1222
+ ...projectConfig.defaultModel,
1223
+ ...config2
1224
+ };
1225
+ }
1226
+ function mergeAndValidateConfig(config2, projectConfig) {
1227
+ var _a;
1228
+ const mergedConfig = mergeConfig(config2, projectConfig);
1229
+ mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
1230
+ if (!mergedConfig.model) {
1231
+ throw new Error("Model is required");
1232
+ }
1233
+ return mergedConfig;
1234
+ }
1235
+
1236
+ // src/agents/judge-agent.ts
1237
+ function buildSystemPrompt(criteria, description) {
1238
+ const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
1239
+ return `
1240
+ <role>
1241
+ You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
1242
+ </role>
1243
+
1244
+ <goal>
1245
+ Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
1246
+ If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
1247
+ </goal>
1248
+
1249
+ <scenario>
1250
+ ${description}
1251
+ </scenario>
1252
+
1253
+ <criteria>
1254
+ ${criteriaList}
1255
+ </criteria>
1256
+
1257
+ <rules>
1258
+ - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
1259
+ - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
1260
+ </rules>
1261
+ `.trim();
1262
+ }
1263
+ function buildContinueTestTool() {
1264
+ return (0, import_ai.tool)({
1265
+ description: "Continue the test with the next step",
1266
+ parameters: import_zod3.z.object({})
1267
+ });
1268
+ }
1269
+ function buildFinishTestTool(criteria) {
1270
+ const criteriaNames = criteria.map(criterionToParamName);
1271
+ return (0, import_ai.tool)({
1272
+ description: "Complete the test with a final verdict",
1273
+ parameters: import_zod3.z.object({
1274
+ criteria: import_zod3.z.object(
1275
+ Object.fromEntries(
1276
+ criteriaNames.map((name, idx) => [
1277
+ name,
1278
+ import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
1279
+ ])
1280
+ )
1281
+ ).strict().describe("Strict verdict for each criterion"),
1282
+ reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
1283
+ verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
1284
+ })
1285
+ });
1286
+ }
1287
+ var judgeAgent = (cfg) => {
1288
+ return {
1289
+ role: "Judge" /* JUDGE */,
1290
+ criteria: cfg.criteria,
1291
+ call: async (input) => {
1292
+ var _a;
1293
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
1294
+ const messages = [
1295
+ { role: "system", content: systemPrompt },
1296
+ ...input.messages
1297
+ ];
1298
+ const isLastMessage = input.scenarioState.turn == input.scenarioConfig.maxTurns;
1299
+ const projectConfig = await getProjectConfig();
1300
+ const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
1301
+ if (!mergedConfig.model) {
1302
+ throw new Error("Model is required for the judge agent");
1303
+ }
1304
+ const tools = {
1305
+ continue_test: buildContinueTestTool(),
1306
+ finish_test: buildFinishTestTool(cfg.criteria)
1307
+ };
1308
+ const enforceJudgement = input.judgmentRequest;
1309
+ const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
1310
+ if (enforceJudgement && !hasCriteria) {
1311
+ return {
1312
+ success: false,
1313
+ messages: [],
1314
+ reasoning: "JudgeAgent: No criteria was provided to be judged against",
1315
+ passedCriteria: [],
1316
+ failedCriteria: []
1317
+ };
1318
+ }
1319
+ const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
1320
+ const completion = await (0, import_ai.generateText)({
1321
+ model: mergedConfig.model,
1322
+ messages,
1323
+ temperature: mergedConfig.temperature ?? 0,
1324
+ maxTokens: mergedConfig.maxTokens,
1325
+ tools,
1326
+ toolChoice
1327
+ });
1328
+ let args;
1329
+ if ((_a = completion.toolCalls) == null ? void 0 : _a.length) {
1330
+ const toolCall = completion.toolCalls[0];
1331
+ switch (toolCall.toolName) {
1332
+ case "finish_test": {
1333
+ args = toolCall.args;
1334
+ const verdict = args.verdict || "inconclusive";
1335
+ const reasoning = args.reasoning || "No reasoning provided";
1336
+ const criteria = args.criteria || {};
1337
+ const criteriaValues = Object.values(criteria);
1338
+ const passedCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] === "true");
1339
+ const failedCriteria = cfg.criteria.filter((_, i) => criteriaValues[i] !== "true");
1340
+ return {
1341
+ success: verdict === "success",
1342
+ messages: input.messages,
1343
+ reasoning,
1344
+ passedCriteria,
1345
+ failedCriteria
1346
+ };
1347
+ }
1348
+ case "continue_test":
1349
+ return [];
1350
+ default:
1351
+ return {
1352
+ success: false,
1353
+ messages: input.messages,
1354
+ reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
1355
+ passedCriteria: [],
1356
+ failedCriteria: cfg.criteria
1357
+ };
1358
+ }
1359
+ }
1360
+ return {
1361
+ success: false,
1362
+ messages: input.messages,
1363
+ reasoning: `JudgeAgent: No tool call found in LLM output`,
1364
+ passedCriteria: [],
1365
+ failedCriteria: cfg.criteria
1366
+ };
1367
+ }
1368
+ };
1369
+ };
1370
+
1371
+ // src/agents/user-simulator-agent.ts
1372
+ var import_ai2 = require("ai");
1373
+ function buildSystemPrompt2(description) {
1374
+ return `
1375
+ <role>
1376
+ You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
1377
+ Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
1378
+ </role>
1379
+
1380
+ <goal>
1381
+ Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
1382
+ </goal>
1383
+
1384
+ <scenario>
1385
+ ${description}
1386
+ </scenario>
1387
+
1388
+ <rules>
1389
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
1390
+ </rules>
1391
+ `.trim();
1392
+ }
1393
+ var userSimulatorAgent = (config2) => {
1394
+ return {
1395
+ role: "User" /* USER */,
1396
+ call: async (input) => {
1397
+ const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
1398
+ const messages = [
1399
+ { role: "system", content: systemPrompt },
1400
+ { role: "assistant", content: "Hello, how can I help you today" },
1401
+ ...input.messages
1402
+ ];
1403
+ const projectConfig = await getProjectConfig();
1404
+ const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
1405
+ if (!mergedConfig.model) {
1406
+ throw new Error("Model is required for the user simulator agent");
1407
+ }
1408
+ const reversedMessages = messageRoleReversal(messages);
1409
+ const completion = await (0, import_ai2.generateText)({
1410
+ model: mergedConfig.model,
1411
+ messages: reversedMessages,
1412
+ temperature: mergedConfig.temperature ?? 0,
1413
+ maxTokens: mergedConfig.maxTokens
1414
+ });
1415
+ const messageContent = completion.text;
1416
+ if (!messageContent) {
1417
+ throw new Error("No response content from LLM");
1418
+ }
1419
+ return { role: "user", content: messageContent };
1420
+ }
1421
+ };
1422
+ };
1423
+ // Annotate the CommonJS export names for ESM import in node:
1424
+ 0 && (module.exports = {
1425
+ AgentAdapter,
1426
+ AgentRole,
1427
+ JudgeAgentAdapter,
1428
+ ScenarioExecution,
1429
+ ScenarioExecutionState,
1430
+ UserSimulatorAgentAdapter,
1431
+ agent,
1432
+ allAgentRoles,
1433
+ defineConfig,
1434
+ fail,
1435
+ judge,
1436
+ judgeAgent,
1437
+ message,
1438
+ proceed,
1439
+ run,
1440
+ scenarioProjectConfigSchema,
1441
+ succeed,
1442
+ user,
1443
+ userSimulatorAgent
1444
+ });