@langwatch/scenario 0.2.2 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -9
- package/dist/{chunk-NUZZAQV2.mjs → chunk-7H6OGEQ5.mjs} +85 -163
- package/dist/chunk-K7KLHTDI.mjs +146 -0
- package/dist/chunk-YPJZSK4J.mjs +121 -0
- package/dist/index.d.mts +86 -72
- package/dist/index.d.ts +86 -72
- package/dist/index.js +131 -82
- package/dist/index.mjs +40 -24
- package/dist/integrations/vitest/config.d.mts +5 -0
- package/dist/integrations/vitest/config.d.ts +5 -0
- package/dist/integrations/vitest/config.js +324 -0
- package/dist/integrations/vitest/config.mjs +35 -0
- package/dist/integrations/vitest/reporter.js +124 -1
- package/dist/integrations/vitest/reporter.mjs +4 -135
- package/dist/integrations/vitest/setup-global.d.mts +3 -0
- package/dist/integrations/vitest/setup-global.d.ts +3 -0
- package/dist/integrations/vitest/setup-global.js +30 -0
- package/dist/integrations/vitest/setup-global.mjs +11 -0
- package/dist/integrations/vitest/setup.js +97 -67
- package/dist/integrations/vitest/setup.mjs +7 -3
- package/package.json +13 -4
package/dist/index.js
CHANGED
|
@@ -32,7 +32,9 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
AgentAdapter: () => AgentAdapter,
|
|
34
34
|
AgentRole: () => AgentRole,
|
|
35
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
35
36
|
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
37
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
36
38
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
37
39
|
ScenarioExecution: () => ScenarioExecution,
|
|
38
40
|
ScenarioExecutionState: () => ScenarioExecutionState,
|
|
@@ -71,7 +73,9 @@ var domain_exports = {};
|
|
|
71
73
|
__export(domain_exports, {
|
|
72
74
|
AgentAdapter: () => AgentAdapter,
|
|
73
75
|
AgentRole: () => AgentRole,
|
|
76
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
74
77
|
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
78
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
75
79
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
76
80
|
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
77
81
|
allAgentRoles: () => allAgentRoles,
|
|
@@ -120,6 +124,10 @@ var JudgeAgentAdapter = class {
|
|
|
120
124
|
}
|
|
121
125
|
};
|
|
122
126
|
|
|
127
|
+
// src/domain/scenarios/index.ts
|
|
128
|
+
var DEFAULT_MAX_TURNS = 10;
|
|
129
|
+
var DEFAULT_VERBOSE = false;
|
|
130
|
+
|
|
123
131
|
// src/agents/utils.ts
|
|
124
132
|
var toolMessageRole = "tool";
|
|
125
133
|
var assistantMessageRole = "assistant";
|
|
@@ -175,6 +183,53 @@ var criterionToParamName = (criterion) => {
|
|
|
175
183
|
return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
|
|
176
184
|
};
|
|
177
185
|
|
|
186
|
+
// src/config/env.ts
|
|
187
|
+
var import_zod2 = require("zod");
|
|
188
|
+
|
|
189
|
+
// src/config/log-levels.ts
|
|
190
|
+
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
191
|
+
LogLevel2["ERROR"] = "ERROR";
|
|
192
|
+
LogLevel2["WARN"] = "WARN";
|
|
193
|
+
LogLevel2["INFO"] = "INFO";
|
|
194
|
+
LogLevel2["DEBUG"] = "DEBUG";
|
|
195
|
+
return LogLevel2;
|
|
196
|
+
})(LogLevel || {});
|
|
197
|
+
|
|
198
|
+
// src/config/env.ts
|
|
199
|
+
var envSchema = import_zod2.z.object({
|
|
200
|
+
/**
|
|
201
|
+
* LangWatch API key for event reporting.
|
|
202
|
+
* If not provided, events will not be sent to LangWatch.
|
|
203
|
+
*/
|
|
204
|
+
LANGWATCH_API_KEY: import_zod2.z.string().optional(),
|
|
205
|
+
/**
|
|
206
|
+
* LangWatch endpoint URL for event reporting.
|
|
207
|
+
* Defaults to the production LangWatch endpoint.
|
|
208
|
+
*/
|
|
209
|
+
LANGWATCH_ENDPOINT: import_zod2.z.string().url().default("https://app.langwatch.ai"),
|
|
210
|
+
/**
|
|
211
|
+
* Disables simulation report info messages when set to any truthy value.
|
|
212
|
+
* Useful for CI/CD environments or when you want cleaner output.
|
|
213
|
+
*/
|
|
214
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
|
|
215
|
+
/**
|
|
216
|
+
* Node environment - affects logging and behavior.
|
|
217
|
+
* Defaults to 'development' if not specified.
|
|
218
|
+
*/
|
|
219
|
+
NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
|
|
220
|
+
/**
|
|
221
|
+
* Log level for the scenario package.
|
|
222
|
+
* Defaults to 'info' if not specified.
|
|
223
|
+
*/
|
|
224
|
+
LOG_LEVEL: import_zod2.z.nativeEnum(LogLevel).optional(),
|
|
225
|
+
/**
|
|
226
|
+
* Scenario batch run ID.
|
|
227
|
+
* If not provided, a random ID will be generated.
|
|
228
|
+
*/
|
|
229
|
+
SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
|
|
230
|
+
});
|
|
231
|
+
var env = envSchema.parse(process.env);
|
|
232
|
+
|
|
178
233
|
// src/config/load.ts
|
|
179
234
|
var import_promises = __toESM(require("fs/promises"));
|
|
180
235
|
var import_node_path = __toESM(require("path"));
|
|
@@ -220,7 +275,7 @@ var Logger = class _Logger {
|
|
|
220
275
|
return new _Logger(context);
|
|
221
276
|
}
|
|
222
277
|
getLogLevel() {
|
|
223
|
-
return env.
|
|
278
|
+
return env.LOG_LEVEL ?? "INFO" /* INFO */;
|
|
224
279
|
}
|
|
225
280
|
getLogLevelIndex(level) {
|
|
226
281
|
return Object.values(LogLevel).indexOf(level);
|
|
@@ -278,54 +333,7 @@ var Logger = class _Logger {
|
|
|
278
333
|
}
|
|
279
334
|
};
|
|
280
335
|
|
|
281
|
-
// src/config/
|
|
282
|
-
var import_zod2 = require("zod");
|
|
283
|
-
|
|
284
|
-
// src/config/log-levels.ts
|
|
285
|
-
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
286
|
-
LogLevel2["ERROR"] = "ERROR";
|
|
287
|
-
LogLevel2["WARN"] = "WARN";
|
|
288
|
-
LogLevel2["INFO"] = "INFO";
|
|
289
|
-
LogLevel2["DEBUG"] = "DEBUG";
|
|
290
|
-
return LogLevel2;
|
|
291
|
-
})(LogLevel || {});
|
|
292
|
-
|
|
293
|
-
// src/config/env.ts
|
|
294
|
-
var envSchema = import_zod2.z.object({
|
|
295
|
-
/**
|
|
296
|
-
* LangWatch API key for event reporting.
|
|
297
|
-
* If not provided, events will not be sent to LangWatch.
|
|
298
|
-
*/
|
|
299
|
-
LANGWATCH_API_KEY: import_zod2.z.string().optional(),
|
|
300
|
-
/**
|
|
301
|
-
* LangWatch endpoint URL for event reporting.
|
|
302
|
-
* Defaults to the production LangWatch endpoint.
|
|
303
|
-
*/
|
|
304
|
-
LANGWATCH_ENDPOINT: import_zod2.z.string().url().default("https://app.langwatch.ai"),
|
|
305
|
-
/**
|
|
306
|
-
* Disables simulation report info messages when set to any truthy value.
|
|
307
|
-
* Useful for CI/CD environments or when you want cleaner output.
|
|
308
|
-
*/
|
|
309
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
|
|
310
|
-
/**
|
|
311
|
-
* Node environment - affects logging and behavior.
|
|
312
|
-
* Defaults to 'development' if not specified.
|
|
313
|
-
*/
|
|
314
|
-
NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
|
|
315
|
-
/**
|
|
316
|
-
* Log level for the scenario package.
|
|
317
|
-
* Defaults to 'info' if not specified.
|
|
318
|
-
*/
|
|
319
|
-
SCENARIO_LOG_LEVEL: import_zod2.z.nativeEnum(LogLevel).optional(),
|
|
320
|
-
/**
|
|
321
|
-
* Scenario batch run ID.
|
|
322
|
-
* If not provided, a random ID will be generated.
|
|
323
|
-
*/
|
|
324
|
-
SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
|
|
325
|
-
});
|
|
326
|
-
var env = envSchema.parse(process.env);
|
|
327
|
-
|
|
328
|
-
// src/config/index.ts
|
|
336
|
+
// src/config/get-project-config.ts
|
|
329
337
|
var logger = new Logger("scenario.config");
|
|
330
338
|
var configLoaded = false;
|
|
331
339
|
var config = null;
|
|
@@ -340,7 +348,7 @@ async function loadProjectConfig() {
|
|
|
340
348
|
configLoadPromise = (async () => {
|
|
341
349
|
try {
|
|
342
350
|
config = await loadScenarioProjectConfig();
|
|
343
|
-
logger.
|
|
351
|
+
logger.debug("loaded scenario project config", { config });
|
|
344
352
|
} catch (error) {
|
|
345
353
|
logger.error("error loading scenario project config", { error });
|
|
346
354
|
} finally {
|
|
@@ -573,7 +581,10 @@ __export(execution_exports, {
|
|
|
573
581
|
var import_rxjs = require("rxjs");
|
|
574
582
|
|
|
575
583
|
// src/utils/ids.ts
|
|
584
|
+
var import_node_crypto = __toESM(require("crypto"));
|
|
585
|
+
var import_node_process = __toESM(require("process"));
|
|
576
586
|
var import_xksuid = require("xksuid");
|
|
587
|
+
var batchRunId;
|
|
577
588
|
function generateThreadId() {
|
|
578
589
|
return `thread_${(0, import_xksuid.generate)()}`;
|
|
579
590
|
}
|
|
@@ -584,10 +595,31 @@ function generateScenarioId() {
|
|
|
584
595
|
return `scenario_${(0, import_xksuid.generate)()}`;
|
|
585
596
|
}
|
|
586
597
|
function getBatchRunId() {
|
|
587
|
-
if (
|
|
588
|
-
|
|
589
|
-
}
|
|
590
|
-
|
|
598
|
+
if (batchRunId) {
|
|
599
|
+
return batchRunId;
|
|
600
|
+
}
|
|
601
|
+
if (import_node_process.default.env.SCENARIO_BATCH_RUN_ID) {
|
|
602
|
+
console.log("process.env.SCENARIO_BATCH_RUN_ID", import_node_process.default.env.SCENARIO_BATCH_RUN_ID);
|
|
603
|
+
return batchRunId = import_node_process.default.env.SCENARIO_BATCH_RUN_ID;
|
|
604
|
+
}
|
|
605
|
+
if (import_node_process.default.env.VITEST_WORKER_ID || import_node_process.default.env.JEST_WORKER_ID) {
|
|
606
|
+
const parentProcessId = import_node_process.default.ppid;
|
|
607
|
+
const now = /* @__PURE__ */ new Date();
|
|
608
|
+
const year = now.getUTCFullYear();
|
|
609
|
+
const week = String(getISOWeekNumber(now)).padStart(2, "0");
|
|
610
|
+
const raw = `${parentProcessId}_${year}_w${week}`;
|
|
611
|
+
const hash = import_node_crypto.default.createHash("sha256").update(raw).digest("hex").slice(0, 12);
|
|
612
|
+
return batchRunId = `scenariobatchrun_${hash}`;
|
|
613
|
+
}
|
|
614
|
+
return batchRunId = `scenariobatchrun_${(0, import_xksuid.generate)()}`;
|
|
615
|
+
}
|
|
616
|
+
function getISOWeekNumber(date) {
|
|
617
|
+
const tmp = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
|
618
|
+
const dayNum = tmp.getUTCDay() || 7;
|
|
619
|
+
tmp.setUTCDate(tmp.getUTCDate() + 4 - dayNum);
|
|
620
|
+
const yearStart = new Date(Date.UTC(tmp.getUTCFullYear(), 0, 1));
|
|
621
|
+
const weekNo = Math.ceil(((tmp.getTime() - yearStart.getTime()) / 864e5 + 1) / 7);
|
|
622
|
+
return weekNo;
|
|
591
623
|
}
|
|
592
624
|
function generateMessageId() {
|
|
593
625
|
return `scenariomsg_${(0, import_xksuid.generate)()}`;
|
|
@@ -643,6 +675,16 @@ var ScenarioExecutionState = class {
|
|
|
643
675
|
}
|
|
644
676
|
return lastMessage;
|
|
645
677
|
}
|
|
678
|
+
lastAgentMessage() {
|
|
679
|
+
if (this._messages.length === 0) {
|
|
680
|
+
throw new Error("No messages in history");
|
|
681
|
+
}
|
|
682
|
+
const lastMessage = this._messages.findLast((message2) => message2.role === "assistant");
|
|
683
|
+
if (!lastMessage) {
|
|
684
|
+
throw new Error("No agent message in history");
|
|
685
|
+
}
|
|
686
|
+
return lastMessage;
|
|
687
|
+
}
|
|
646
688
|
lastToolCall(toolName) {
|
|
647
689
|
if (this._messages.length === 0) {
|
|
648
690
|
throw new Error("No messages in history");
|
|
@@ -650,9 +692,6 @@ var ScenarioExecutionState = class {
|
|
|
650
692
|
const lastMessage = this._messages.findLast((message2) => message2.role === "tool" && message2.content.find(
|
|
651
693
|
(part) => part.type === "tool-result" && part.toolName === toolName
|
|
652
694
|
));
|
|
653
|
-
if (!lastMessage) {
|
|
654
|
-
throw new Error("No tool call message in history");
|
|
655
|
-
}
|
|
656
695
|
return lastMessage;
|
|
657
696
|
}
|
|
658
697
|
hasToolCall(toolName) {
|
|
@@ -808,7 +847,6 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
808
847
|
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
809
848
|
|
|
810
849
|
// src/execution/scenario-execution.ts
|
|
811
|
-
var batchRunId = getBatchRunId();
|
|
812
850
|
var ScenarioExecution = class {
|
|
813
851
|
state;
|
|
814
852
|
eventSubject = new import_rxjs.Subject();
|
|
@@ -838,8 +876,8 @@ var ScenarioExecution = class {
|
|
|
838
876
|
description: config2.description,
|
|
839
877
|
agents: config2.agents,
|
|
840
878
|
script,
|
|
841
|
-
verbose: config2.verbose ??
|
|
842
|
-
maxTurns: config2.maxTurns ??
|
|
879
|
+
verbose: config2.verbose ?? DEFAULT_VERBOSE,
|
|
880
|
+
maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
|
|
843
881
|
threadId: config2.threadId ?? generateThreadId(),
|
|
844
882
|
setId: config2.setId
|
|
845
883
|
};
|
|
@@ -891,12 +929,14 @@ var ScenarioExecution = class {
|
|
|
891
929
|
}
|
|
892
930
|
}
|
|
893
931
|
this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
|
|
894
|
-
return this.reachedMaxTurns(
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
932
|
+
return this.reachedMaxTurns(
|
|
933
|
+
[
|
|
934
|
+
"Reached end of script without conclusion, add one of the following to the end of the script:",
|
|
935
|
+
"- `Scenario.proceed()` to let the simulation continue to play out",
|
|
936
|
+
"- `Scenario.judge()` to force criteria judgement",
|
|
937
|
+
"- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
|
|
938
|
+
].join("\n")
|
|
939
|
+
);
|
|
900
940
|
} catch (error) {
|
|
901
941
|
const errorResult = {
|
|
902
942
|
success: false,
|
|
@@ -1031,8 +1071,7 @@ var ScenarioExecution = class {
|
|
|
1031
1071
|
while (true) {
|
|
1032
1072
|
const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
|
|
1033
1073
|
const nextMessage = await this._step(goToNextTurn, onTurn);
|
|
1034
|
-
if (initialTurn === null)
|
|
1035
|
-
initialTurn = this.state.currentTurn;
|
|
1074
|
+
if (initialTurn === null) initialTurn = this.state.currentTurn;
|
|
1036
1075
|
if (nextMessage === null) {
|
|
1037
1076
|
return null;
|
|
1038
1077
|
}
|
|
@@ -1118,7 +1157,10 @@ var ScenarioExecution = class {
|
|
|
1118
1157
|
agent2 = nextAgent.agent;
|
|
1119
1158
|
this.removePendingAgent(agent2);
|
|
1120
1159
|
if (content) {
|
|
1121
|
-
const message2 = typeof content === "string" ? {
|
|
1160
|
+
const message2 = typeof content === "string" ? {
|
|
1161
|
+
role: role === "User" /* USER */ ? "user" : "assistant",
|
|
1162
|
+
content
|
|
1163
|
+
} : content;
|
|
1122
1164
|
this.state.addMessage(message2);
|
|
1123
1165
|
this.broadcastMessage(message2, index);
|
|
1124
1166
|
return null;
|
|
@@ -1191,7 +1233,9 @@ var ScenarioExecution = class {
|
|
|
1191
1233
|
reachedMaxTurns(errorMessage) {
|
|
1192
1234
|
var _a;
|
|
1193
1235
|
const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
|
|
1194
|
-
const agentTimes = agentRoleAgentsIdx.map(
|
|
1236
|
+
const agentTimes = agentRoleAgentsIdx.map(
|
|
1237
|
+
(i) => this.agentTimes.get(i) || 0
|
|
1238
|
+
);
|
|
1195
1239
|
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
1196
1240
|
return {
|
|
1197
1241
|
success: false,
|
|
@@ -1220,7 +1264,7 @@ var ScenarioExecution = class {
|
|
|
1220
1264
|
type: "placeholder",
|
|
1221
1265
|
// This will be replaced by the specific event type
|
|
1222
1266
|
timestamp: Date.now(),
|
|
1223
|
-
batchRunId,
|
|
1267
|
+
batchRunId: getBatchRunId(),
|
|
1224
1268
|
scenarioId: this.config.id,
|
|
1225
1269
|
scenarioRunId,
|
|
1226
1270
|
scenarioSetId: this.config.setId
|
|
@@ -1293,10 +1337,8 @@ var ScenarioExecution = class {
|
|
|
1293
1337
|
function convertAgentReturnTypesToMessages(response, role) {
|
|
1294
1338
|
if (typeof response === "string")
|
|
1295
1339
|
return [{ role, content: response }];
|
|
1296
|
-
if (Array.isArray(response))
|
|
1297
|
-
|
|
1298
|
-
if (typeof response === "object" && "role" in response)
|
|
1299
|
-
return [response];
|
|
1340
|
+
if (Array.isArray(response)) return response;
|
|
1341
|
+
if (typeof response === "object" && "role" in response) return [response];
|
|
1300
1342
|
return [];
|
|
1301
1343
|
}
|
|
1302
1344
|
|
|
@@ -1320,12 +1362,11 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1320
1362
|
if (this.isGreetingDisabled()) {
|
|
1321
1363
|
return;
|
|
1322
1364
|
}
|
|
1323
|
-
|
|
1324
|
-
if (_EventAlertMessageLogger.shownBatchIds.has(batchRunId2)) {
|
|
1365
|
+
if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
|
|
1325
1366
|
return;
|
|
1326
1367
|
}
|
|
1327
|
-
_EventAlertMessageLogger.shownBatchIds.add(
|
|
1328
|
-
this.displayGreeting(
|
|
1368
|
+
_EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
|
|
1369
|
+
this.displayGreeting();
|
|
1329
1370
|
}
|
|
1330
1371
|
/**
|
|
1331
1372
|
* Shows a fancy message about how to watch the simulation.
|
|
@@ -1340,7 +1381,7 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1340
1381
|
isGreetingDisabled() {
|
|
1341
1382
|
return env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
|
|
1342
1383
|
}
|
|
1343
|
-
displayGreeting(
|
|
1384
|
+
displayGreeting() {
|
|
1344
1385
|
const separator = "\u2500".repeat(60);
|
|
1345
1386
|
if (!env.LANGWATCH_API_KEY) {
|
|
1346
1387
|
console.log(`
|
|
@@ -1354,7 +1395,10 @@ ${separator}`);
|
|
|
1354
1395
|
console.log(" \u2022 Set LANGWATCH_API_KEY environment variable");
|
|
1355
1396
|
console.log(" \u2022 Or configure apiKey in scenario.config.js");
|
|
1356
1397
|
console.log("");
|
|
1357
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
1398
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
1399
|
+
console.log("");
|
|
1400
|
+
console.log("\u{1F507} To disable these messages:");
|
|
1401
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
1358
1402
|
console.log(`${separator}
|
|
1359
1403
|
`);
|
|
1360
1404
|
} else {
|
|
@@ -1368,7 +1412,10 @@ ${separator}`);
|
|
|
1368
1412
|
` API Key: ${env.LANGWATCH_API_KEY.length > 0 ? "Configured" : "Not configured"}`
|
|
1369
1413
|
);
|
|
1370
1414
|
console.log("");
|
|
1371
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
1415
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
1416
|
+
console.log("");
|
|
1417
|
+
console.log("\u{1F507} To disable these messages:");
|
|
1418
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
1372
1419
|
console.log(`${separator}
|
|
1373
1420
|
`);
|
|
1374
1421
|
}
|
|
@@ -1712,7 +1759,9 @@ var index_default = scenario;
|
|
|
1712
1759
|
0 && (module.exports = {
|
|
1713
1760
|
AgentAdapter,
|
|
1714
1761
|
AgentRole,
|
|
1762
|
+
DEFAULT_MAX_TURNS,
|
|
1715
1763
|
DEFAULT_TEMPERATURE,
|
|
1764
|
+
DEFAULT_VERBOSE,
|
|
1716
1765
|
JudgeAgentAdapter,
|
|
1717
1766
|
ScenarioExecution,
|
|
1718
1767
|
ScenarioExecutionState,
|
package/dist/index.mjs
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import {
|
|
2
2
|
AgentAdapter,
|
|
3
3
|
AgentRole,
|
|
4
|
+
DEFAULT_MAX_TURNS,
|
|
4
5
|
DEFAULT_TEMPERATURE,
|
|
6
|
+
DEFAULT_VERBOSE,
|
|
5
7
|
EventBus,
|
|
6
8
|
JudgeAgentAdapter,
|
|
7
|
-
Logger,
|
|
8
9
|
UserSimulatorAgentAdapter,
|
|
9
10
|
allAgentRoles,
|
|
10
11
|
defineConfig,
|
|
11
12
|
domain_exports,
|
|
12
|
-
env,
|
|
13
13
|
generateMessageId,
|
|
14
14
|
generateScenarioId,
|
|
15
15
|
generateScenarioRunId,
|
|
@@ -17,7 +17,11 @@ import {
|
|
|
17
17
|
getBatchRunId,
|
|
18
18
|
getProjectConfig,
|
|
19
19
|
scenarioProjectConfigSchema
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-7H6OGEQ5.mjs";
|
|
21
|
+
import {
|
|
22
|
+
Logger,
|
|
23
|
+
env
|
|
24
|
+
} from "./chunk-YPJZSK4J.mjs";
|
|
21
25
|
import {
|
|
22
26
|
__export
|
|
23
27
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -356,6 +360,16 @@ var ScenarioExecutionState = class {
|
|
|
356
360
|
}
|
|
357
361
|
return lastMessage;
|
|
358
362
|
}
|
|
363
|
+
lastAgentMessage() {
|
|
364
|
+
if (this._messages.length === 0) {
|
|
365
|
+
throw new Error("No messages in history");
|
|
366
|
+
}
|
|
367
|
+
const lastMessage = this._messages.findLast((message2) => message2.role === "assistant");
|
|
368
|
+
if (!lastMessage) {
|
|
369
|
+
throw new Error("No agent message in history");
|
|
370
|
+
}
|
|
371
|
+
return lastMessage;
|
|
372
|
+
}
|
|
359
373
|
lastToolCall(toolName) {
|
|
360
374
|
if (this._messages.length === 0) {
|
|
361
375
|
throw new Error("No messages in history");
|
|
@@ -363,9 +377,6 @@ var ScenarioExecutionState = class {
|
|
|
363
377
|
const lastMessage = this._messages.findLast((message2) => message2.role === "tool" && message2.content.find(
|
|
364
378
|
(part) => part.type === "tool-result" && part.toolName === toolName
|
|
365
379
|
));
|
|
366
|
-
if (!lastMessage) {
|
|
367
|
-
throw new Error("No tool call message in history");
|
|
368
|
-
}
|
|
369
380
|
return lastMessage;
|
|
370
381
|
}
|
|
371
382
|
hasToolCall(toolName) {
|
|
@@ -449,7 +460,6 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
449
460
|
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
450
461
|
|
|
451
462
|
// src/execution/scenario-execution.ts
|
|
452
|
-
var batchRunId = getBatchRunId();
|
|
453
463
|
var ScenarioExecution = class {
|
|
454
464
|
state;
|
|
455
465
|
eventSubject = new Subject();
|
|
@@ -479,8 +489,8 @@ var ScenarioExecution = class {
|
|
|
479
489
|
description: config.description,
|
|
480
490
|
agents: config.agents,
|
|
481
491
|
script,
|
|
482
|
-
verbose: config.verbose ??
|
|
483
|
-
maxTurns: config.maxTurns ??
|
|
492
|
+
verbose: config.verbose ?? DEFAULT_VERBOSE,
|
|
493
|
+
maxTurns: config.maxTurns ?? DEFAULT_MAX_TURNS,
|
|
484
494
|
threadId: config.threadId ?? generateThreadId(),
|
|
485
495
|
setId: config.setId
|
|
486
496
|
};
|
|
@@ -532,12 +542,14 @@ var ScenarioExecution = class {
|
|
|
532
542
|
}
|
|
533
543
|
}
|
|
534
544
|
this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
|
|
535
|
-
return this.reachedMaxTurns(
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
545
|
+
return this.reachedMaxTurns(
|
|
546
|
+
[
|
|
547
|
+
"Reached end of script without conclusion, add one of the following to the end of the script:",
|
|
548
|
+
"- `Scenario.proceed()` to let the simulation continue to play out",
|
|
549
|
+
"- `Scenario.judge()` to force criteria judgement",
|
|
550
|
+
"- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
|
|
551
|
+
].join("\n")
|
|
552
|
+
);
|
|
541
553
|
} catch (error) {
|
|
542
554
|
const errorResult = {
|
|
543
555
|
success: false,
|
|
@@ -672,8 +684,7 @@ var ScenarioExecution = class {
|
|
|
672
684
|
while (true) {
|
|
673
685
|
const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
|
|
674
686
|
const nextMessage = await this._step(goToNextTurn, onTurn);
|
|
675
|
-
if (initialTurn === null)
|
|
676
|
-
initialTurn = this.state.currentTurn;
|
|
687
|
+
if (initialTurn === null) initialTurn = this.state.currentTurn;
|
|
677
688
|
if (nextMessage === null) {
|
|
678
689
|
return null;
|
|
679
690
|
}
|
|
@@ -759,7 +770,10 @@ var ScenarioExecution = class {
|
|
|
759
770
|
agent2 = nextAgent.agent;
|
|
760
771
|
this.removePendingAgent(agent2);
|
|
761
772
|
if (content) {
|
|
762
|
-
const message2 = typeof content === "string" ? {
|
|
773
|
+
const message2 = typeof content === "string" ? {
|
|
774
|
+
role: role === "User" /* USER */ ? "user" : "assistant",
|
|
775
|
+
content
|
|
776
|
+
} : content;
|
|
763
777
|
this.state.addMessage(message2);
|
|
764
778
|
this.broadcastMessage(message2, index);
|
|
765
779
|
return null;
|
|
@@ -832,7 +846,9 @@ var ScenarioExecution = class {
|
|
|
832
846
|
reachedMaxTurns(errorMessage) {
|
|
833
847
|
var _a;
|
|
834
848
|
const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
|
|
835
|
-
const agentTimes = agentRoleAgentsIdx.map(
|
|
849
|
+
const agentTimes = agentRoleAgentsIdx.map(
|
|
850
|
+
(i) => this.agentTimes.get(i) || 0
|
|
851
|
+
);
|
|
836
852
|
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
837
853
|
return {
|
|
838
854
|
success: false,
|
|
@@ -861,7 +877,7 @@ var ScenarioExecution = class {
|
|
|
861
877
|
type: "placeholder",
|
|
862
878
|
// This will be replaced by the specific event type
|
|
863
879
|
timestamp: Date.now(),
|
|
864
|
-
batchRunId,
|
|
880
|
+
batchRunId: getBatchRunId(),
|
|
865
881
|
scenarioId: this.config.id,
|
|
866
882
|
scenarioRunId,
|
|
867
883
|
scenarioSetId: this.config.setId
|
|
@@ -934,10 +950,8 @@ var ScenarioExecution = class {
|
|
|
934
950
|
function convertAgentReturnTypesToMessages(response, role) {
|
|
935
951
|
if (typeof response === "string")
|
|
936
952
|
return [{ role, content: response }];
|
|
937
|
-
if (Array.isArray(response))
|
|
938
|
-
|
|
939
|
-
if (typeof response === "object" && "role" in response)
|
|
940
|
-
return [response];
|
|
953
|
+
if (Array.isArray(response)) return response;
|
|
954
|
+
if (typeof response === "object" && "role" in response) return [response];
|
|
941
955
|
return [];
|
|
942
956
|
}
|
|
943
957
|
|
|
@@ -1087,7 +1101,9 @@ var index_default = scenario;
|
|
|
1087
1101
|
export {
|
|
1088
1102
|
AgentAdapter,
|
|
1089
1103
|
AgentRole,
|
|
1104
|
+
DEFAULT_MAX_TURNS,
|
|
1090
1105
|
DEFAULT_TEMPERATURE,
|
|
1106
|
+
DEFAULT_VERBOSE,
|
|
1091
1107
|
JudgeAgentAdapter,
|
|
1092
1108
|
ScenarioExecution,
|
|
1093
1109
|
ScenarioExecutionState,
|