@langwatch/scenario 0.2.13 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -67,7 +67,7 @@ __export(agents_exports, {
67
67
 
68
68
  // src/agents/judge-agent.ts
69
69
  var import_ai = require("ai");
70
- var import_zod3 = require("zod");
70
+ var import_v43 = require("zod/v4");
71
71
 
72
72
  // src/domain/index.ts
73
73
  var domain_exports = {};
@@ -85,15 +85,15 @@ __export(domain_exports, {
85
85
  });
86
86
 
87
87
  // src/domain/core/config.ts
88
- var import_zod = require("zod");
88
+ var import_v4 = require("zod/v4");
89
89
  var DEFAULT_TEMPERATURE = 0;
90
- var scenarioProjectConfigSchema = import_zod.z.object({
91
- defaultModel: import_zod.z.object({
92
- model: import_zod.z.custom(),
93
- temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
94
- maxTokens: import_zod.z.number().optional()
90
+ var scenarioProjectConfigSchema = import_v4.z.object({
91
+ defaultModel: import_v4.z.object({
92
+ model: import_v4.z.custom(),
93
+ temperature: import_v4.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
94
+ maxTokens: import_v4.z.number().optional()
95
95
  }).optional(),
96
- headless: import_zod.z.boolean().optional().default(
96
+ headless: import_v4.z.boolean().optional().default(
97
97
  typeof process !== "undefined" ? !["false", "0"].includes(process.env.SCENARIO_HEADLESS || "false") : false
98
98
  )
99
99
  }).strict();
@@ -183,7 +183,7 @@ var criterionToParamName = (criterion) => {
183
183
  };
184
184
 
185
185
  // src/config/env.ts
186
- var import_zod2 = require("zod");
186
+ var import_v42 = require("zod/v4");
187
187
 
188
188
  // src/config/log-levels.ts
189
189
  var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
@@ -196,37 +196,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
196
196
  var LOG_LEVELS = Object.values(LogLevel);
197
197
 
198
198
  // src/config/env.ts
199
- var envSchema = import_zod2.z.object({
199
+ var envSchema = import_v42.z.object({
200
200
  /**
201
201
  * LangWatch API key for event reporting.
202
202
  * If not provided, events will not be sent to LangWatch.
203
203
  */
204
- LANGWATCH_API_KEY: import_zod2.z.string().optional(),
204
+ LANGWATCH_API_KEY: import_v42.z.string().optional(),
205
205
  /**
206
206
  * LangWatch endpoint URL for event reporting.
207
207
  * Defaults to the production LangWatch endpoint.
208
208
  */
209
- LANGWATCH_ENDPOINT: import_zod2.z.string().url().optional().default("https://app.langwatch.ai"),
209
+ LANGWATCH_ENDPOINT: import_v42.z.string().url().optional().default("https://app.langwatch.ai"),
210
210
  /**
211
211
  * Disables simulation report info messages when set to any truthy value.
212
212
  * Useful for CI/CD environments or when you want cleaner output.
213
213
  */
214
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
214
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v42.z.string().optional().transform((val) => Boolean(val)),
215
215
  /**
216
216
  * Node environment - affects logging and behavior.
217
217
  * Defaults to 'development' if not specified.
218
218
  */
219
- NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
219
+ NODE_ENV: import_v42.z.enum(["development", "production", "test"]).default("development"),
220
220
  /**
221
221
  * Case-insensitive log level for the scenario package.
222
222
  * Defaults to 'info' if not specified.
223
223
  */
224
- LOG_LEVEL: import_zod2.z.string().toUpperCase().pipe(import_zod2.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
224
+ LOG_LEVEL: import_v42.z.string().toUpperCase().pipe(import_v42.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
225
225
  /**
226
226
  * Scenario batch run ID.
227
227
  * If not provided, a random ID will be generated.
228
228
  */
229
- SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
229
+ SCENARIO_BATCH_RUN_ID: import_v42.z.string().optional()
230
230
  });
231
231
  function getEnv() {
232
232
  return envSchema.parse(process.env);
@@ -423,24 +423,24 @@ ${criteriaList}
423
423
  function buildContinueTestTool() {
424
424
  return (0, import_ai.tool)({
425
425
  description: "Continue the test with the next step",
426
- parameters: import_zod3.z.object({})
426
+ inputSchema: import_v43.z.object({})
427
427
  });
428
428
  }
429
429
  function buildFinishTestTool(criteria) {
430
430
  const criteriaNames = criteria.map(criterionToParamName);
431
431
  return (0, import_ai.tool)({
432
432
  description: "Complete the test with a final verdict",
433
- parameters: import_zod3.z.object({
434
- criteria: import_zod3.z.object(
433
+ inputSchema: import_v43.z.object({
434
+ criteria: import_v43.z.object(
435
435
  Object.fromEntries(
436
436
  criteriaNames.map((name, idx) => [
437
437
  name,
438
- import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
438
+ import_v43.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
439
439
  ])
440
440
  )
441
441
  ).strict().describe("Strict verdict for each criterion"),
442
- reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
443
- verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
442
+ reasoning: import_v43.z.string().describe("Explanation of what the final verdict should be"),
443
+ verdict: import_v43.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
444
444
  })
445
445
  });
446
446
  }
@@ -488,7 +488,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
488
488
  model: mergedConfig.model,
489
489
  messages,
490
490
  temperature: mergedConfig.temperature ?? 0,
491
- maxTokens: mergedConfig.maxTokens,
491
+ maxOutputTokens: mergedConfig.maxTokens,
492
492
  tools,
493
493
  toolChoice
494
494
  });
@@ -497,7 +497,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
497
497
  const toolCall = completion.toolCalls[0];
498
498
  switch (toolCall.toolName) {
499
499
  case "finish_test": {
500
- args = toolCall.args;
500
+ args = toolCall.input;
501
501
  const verdict = args.verdict || "inconclusive";
502
502
  const reasoning = args.reasoning || "No reasoning provided";
503
503
  const criteria = args.criteria || {};
@@ -595,7 +595,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
595
595
  model: mergedConfig.model,
596
596
  messages: reversedMessages,
597
597
  temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
598
- maxTokens: mergedConfig.maxTokens
598
+ maxOutputTokens: mergedConfig.maxTokens
599
599
  });
600
600
  const messageContent = completion.text;
601
601
  if (!messageContent) {
@@ -769,7 +769,7 @@ var ScenarioExecutionState = class {
769
769
 
770
770
  // src/events/schema.ts
771
771
  var import_core = require("@ag-ui/core");
772
- var import_zod4 = require("zod");
772
+ var import_zod = require("zod");
773
773
  var Verdict = /* @__PURE__ */ ((Verdict2) => {
774
774
  Verdict2["SUCCESS"] = "success";
775
775
  Verdict2["FAILURE"] = "failure";
@@ -785,64 +785,64 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
785
785
  ScenarioRunStatus2["FAILED"] = "FAILED";
786
786
  return ScenarioRunStatus2;
787
787
  })(ScenarioRunStatus || {});
788
- var baseEventSchema = import_zod4.z.object({
789
- type: import_zod4.z.nativeEnum(import_core.EventType),
790
- timestamp: import_zod4.z.number(),
791
- rawEvent: import_zod4.z.any().optional()
788
+ var baseEventSchema = import_zod.z.object({
789
+ type: import_zod.z.nativeEnum(import_core.EventType),
790
+ timestamp: import_zod.z.number(),
791
+ rawEvent: import_zod.z.any().optional()
792
792
  });
793
- var batchRunIdSchema = import_zod4.z.string();
794
- var scenarioRunIdSchema = import_zod4.z.string();
795
- var scenarioIdSchema = import_zod4.z.string();
793
+ var batchRunIdSchema = import_zod.z.string();
794
+ var scenarioRunIdSchema = import_zod.z.string();
795
+ var scenarioIdSchema = import_zod.z.string();
796
796
  var baseScenarioEventSchema = baseEventSchema.extend({
797
797
  batchRunId: batchRunIdSchema,
798
798
  scenarioId: scenarioIdSchema,
799
799
  scenarioRunId: scenarioRunIdSchema,
800
- scenarioSetId: import_zod4.z.string().optional().default("default")
800
+ scenarioSetId: import_zod.z.string().optional().default("default")
801
801
  });
802
802
  var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
803
- type: import_zod4.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
804
- metadata: import_zod4.z.object({
805
- name: import_zod4.z.string().optional(),
806
- description: import_zod4.z.string().optional()
803
+ type: import_zod.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
804
+ metadata: import_zod.z.object({
805
+ name: import_zod.z.string().optional(),
806
+ description: import_zod.z.string().optional()
807
807
  })
808
808
  });
809
- var scenarioResultsSchema = import_zod4.z.object({
810
- verdict: import_zod4.z.nativeEnum(Verdict),
811
- reasoning: import_zod4.z.string().optional(),
812
- metCriteria: import_zod4.z.array(import_zod4.z.string()),
813
- unmetCriteria: import_zod4.z.array(import_zod4.z.string()),
814
- error: import_zod4.z.string().optional()
809
+ var scenarioResultsSchema = import_zod.z.object({
810
+ verdict: import_zod.z.nativeEnum(Verdict),
811
+ reasoning: import_zod.z.string().optional(),
812
+ metCriteria: import_zod.z.array(import_zod.z.string()),
813
+ unmetCriteria: import_zod.z.array(import_zod.z.string()),
814
+ error: import_zod.z.string().optional()
815
815
  });
816
816
  var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
817
- type: import_zod4.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
818
- status: import_zod4.z.nativeEnum(ScenarioRunStatus),
817
+ type: import_zod.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
818
+ status: import_zod.z.nativeEnum(ScenarioRunStatus),
819
819
  results: scenarioResultsSchema.optional().nullable()
820
820
  });
821
821
  var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
822
822
  baseScenarioEventSchema.extend({
823
- type: import_zod4.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
823
+ type: import_zod.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
824
824
  })
825
825
  );
826
- var scenarioEventSchema = import_zod4.z.discriminatedUnion("type", [
826
+ var scenarioEventSchema = import_zod.z.discriminatedUnion("type", [
827
827
  scenarioRunStartedSchema,
828
828
  scenarioRunFinishedSchema,
829
829
  scenarioMessageSnapshotSchema
830
830
  ]);
831
- var successSchema = import_zod4.z.object({ success: import_zod4.z.boolean() });
832
- var errorSchema = import_zod4.z.object({ error: import_zod4.z.string() });
833
- var stateSchema = import_zod4.z.object({
834
- state: import_zod4.z.object({
835
- messages: import_zod4.z.array(import_zod4.z.any()),
836
- status: import_zod4.z.string()
831
+ var successSchema = import_zod.z.object({ success: import_zod.z.boolean() });
832
+ var errorSchema = import_zod.z.object({ error: import_zod.z.string() });
833
+ var stateSchema = import_zod.z.object({
834
+ state: import_zod.z.object({
835
+ messages: import_zod.z.array(import_zod.z.any()),
836
+ status: import_zod.z.string()
837
837
  })
838
838
  });
839
- var runsSchema = import_zod4.z.object({ runs: import_zod4.z.array(import_zod4.z.string()) });
840
- var eventsSchema = import_zod4.z.object({ events: import_zod4.z.array(scenarioEventSchema) });
839
+ var runsSchema = import_zod.z.object({ runs: import_zod.z.array(import_zod.z.string()) });
840
+ var eventsSchema = import_zod.z.object({ events: import_zod.z.array(scenarioEventSchema) });
841
841
 
842
842
  // src/utils/convert-core-messages-to-agui-messages.ts
843
- function convertCoreMessagesToAguiMessages(coreMessages) {
843
+ function convertModelMessagesToAguiMessages(modelMessages) {
844
844
  const aguiMessages = [];
845
- for (const msg of coreMessages) {
845
+ for (const msg of modelMessages) {
846
846
  const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
847
847
  switch (true) {
848
848
  case msg.role === "system":
@@ -886,7 +886,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
886
886
  type: "function",
887
887
  function: {
888
888
  name: c.toolName,
889
- arguments: JSON.stringify(c.args)
889
+ arguments: JSON.stringify(c.input)
890
890
  }
891
891
  }))
892
892
  });
@@ -894,11 +894,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
894
894
  }
895
895
  case msg.role === "tool":
896
896
  msg.content.map((p, i) => {
897
+ var _a;
897
898
  aguiMessages.push({
898
899
  id: `${id}-${i}`,
899
900
  role: "tool",
900
901
  toolCallId: p.toolCallId,
901
- content: JSON.stringify(p.result)
902
+ content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
902
903
  });
903
904
  });
904
905
  break;
@@ -908,7 +909,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
908
909
  }
909
910
  return aguiMessages;
910
911
  }
911
- var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
912
+ var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
912
913
 
913
914
  // src/execution/scenario-execution.ts
914
915
  var ScenarioExecution = class {
@@ -974,7 +975,7 @@ var ScenarioExecution = class {
974
975
  /**
975
976
  * Gets the complete conversation history as an array of messages.
976
977
  *
977
- * @returns Array of CoreMessage objects representing the full conversation
978
+ * @returns Array of ModelMessage objects representing the full conversation
978
979
  */
979
980
  get messages() {
980
981
  return this.state.messages;
@@ -1207,7 +1208,7 @@ var ScenarioExecution = class {
1207
1208
  * - "assistant" messages are routed to AGENT role agents
1208
1209
  * - Other message types are added directly to the conversation
1209
1210
  *
1210
- * @param message - The CoreMessage to add to the conversation
1211
+ * @param message - The ModelMessage to add to the conversation
1211
1212
  *
1212
1213
  * @example
1213
1214
  * ```typescript
@@ -1236,7 +1237,7 @@ var ScenarioExecution = class {
1236
1237
  *
1237
1238
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1238
1239
  *
1239
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
1240
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
1240
1241
  * If not provided, the user simulator agent will generate the content.
1241
1242
  *
1242
1243
  * @example
@@ -1247,7 +1248,7 @@ var ScenarioExecution = class {
1247
1248
  * // Let user simulator generate content
1248
1249
  * await execution.user();
1249
1250
  *
1250
- * // Use a CoreMessage object
1251
+ * // Use a ModelMessage object
1251
1252
  * await execution.user({
1252
1253
  * role: "user",
1253
1254
  * content: "Tell me a joke"
@@ -1266,7 +1267,7 @@ var ScenarioExecution = class {
1266
1267
  *
1267
1268
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1268
1269
  *
1269
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
1270
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
1270
1271
  * If not provided, the agent under test will generate the response.
1271
1272
  *
1272
1273
  * @example
@@ -1277,7 +1278,7 @@ var ScenarioExecution = class {
1277
1278
  * // Use provided content
1278
1279
  * await execution.agent("The weather is sunny today!");
1279
1280
  *
1280
- * // Use a CoreMessage object
1281
+ * // Use a ModelMessage object
1281
1282
  * await execution.agent({
1282
1283
  * role: "assistant",
1283
1284
  * content: "I'm here to help you with weather information."
@@ -1886,9 +1887,27 @@ __export(runner_exports, {
1886
1887
  var import_rxjs3 = require("rxjs");
1887
1888
 
1888
1889
  // src/events/event-alert-message-logger.ts
1890
+ var fs2 = __toESM(require("fs"));
1891
+ var os = __toESM(require("os"));
1892
+ var path2 = __toESM(require("path"));
1889
1893
  var import_open = __toESM(require("open"));
1890
- var EventAlertMessageLogger = class _EventAlertMessageLogger {
1891
- static shownBatchIds = /* @__PURE__ */ new Set();
1894
+ var EventAlertMessageLogger = class {
1895
+ /**
1896
+ * Creates a coordination file to prevent duplicate messages across processes.
1897
+ * Returns true if this process should show the message (first one to create the file).
1898
+ */
1899
+ createCoordinationFile(type) {
1900
+ try {
1901
+ const batchId = getBatchRunId();
1902
+ const tmpDir = os.tmpdir();
1903
+ const fileName = `scenario-${type}-${batchId}`;
1904
+ const filePath = path2.join(tmpDir, fileName);
1905
+ fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
1906
+ return true;
1907
+ } catch {
1908
+ return false;
1909
+ }
1910
+ }
1892
1911
  /**
1893
1912
  * Shows a fancy greeting message about simulation reporting status.
1894
1913
  * Only shows once per batch run to avoid spam.
@@ -1897,10 +1916,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
1897
1916
  if (this.isGreetingDisabled()) {
1898
1917
  return;
1899
1918
  }
1900
- if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
1919
+ if (!this.createCoordinationFile("greeting")) {
1901
1920
  return;
1902
1921
  }
1903
- _EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
1904
1922
  this.displayGreeting();
1905
1923
  }
1906
1924
  /**
@@ -1911,6 +1929,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
1911
1929
  if (this.isGreetingDisabled()) {
1912
1930
  return;
1913
1931
  }
1932
+ if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
1933
+ return;
1934
+ }
1914
1935
  await this.displayWatchMessage(params);
1915
1936
  }
1916
1937
  isGreetingDisabled() {
@@ -2254,14 +2275,13 @@ function formatPart(part) {
2254
2275
  case "file":
2255
2276
  return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
2256
2277
  case "tool-call":
2257
- return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
2278
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
2258
2279
  case "tool-result":
2259
- return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
2280
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
2260
2281
  case "reasoning":
2261
2282
  return `(reasoning): ${part.text}`;
2262
- case "redacted-reasoning":
2263
- return `(redacted reasoning): ${part.data}`;
2264
2283
  default:
2284
+ part;
2265
2285
  return `Unknown content: ${JSON.stringify(part)}`;
2266
2286
  }
2267
2287
  }
package/dist/index.mjs CHANGED
@@ -17,11 +17,11 @@ import {
17
17
  getBatchRunId,
18
18
  getProjectConfig,
19
19
  scenarioProjectConfigSchema
20
- } from "./chunk-6SKQWXT7.mjs";
20
+ } from "./chunk-3Z7E24UI.mjs";
21
21
  import {
22
22
  Logger,
23
23
  getEnv
24
- } from "./chunk-OL4RFXV4.mjs";
24
+ } from "./chunk-RHTLQKEJ.mjs";
25
25
  import {
26
26
  __export
27
27
  } from "./chunk-7P6ASYW6.mjs";
@@ -35,7 +35,7 @@ __export(agents_exports, {
35
35
 
36
36
  // src/agents/judge-agent.ts
37
37
  import { generateText, tool } from "ai";
38
- import { z } from "zod";
38
+ import { z } from "zod/v4";
39
39
 
40
40
  // src/agents/utils.ts
41
41
  var toolMessageRole = "tool";
@@ -142,14 +142,14 @@ ${criteriaList}
142
142
  function buildContinueTestTool() {
143
143
  return tool({
144
144
  description: "Continue the test with the next step",
145
- parameters: z.object({})
145
+ inputSchema: z.object({})
146
146
  });
147
147
  }
148
148
  function buildFinishTestTool(criteria) {
149
149
  const criteriaNames = criteria.map(criterionToParamName);
150
150
  return tool({
151
151
  description: "Complete the test with a final verdict",
152
- parameters: z.object({
152
+ inputSchema: z.object({
153
153
  criteria: z.object(
154
154
  Object.fromEntries(
155
155
  criteriaNames.map((name, idx) => [
@@ -207,7 +207,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
207
207
  model: mergedConfig.model,
208
208
  messages,
209
209
  temperature: mergedConfig.temperature ?? 0,
210
- maxTokens: mergedConfig.maxTokens,
210
+ maxOutputTokens: mergedConfig.maxTokens,
211
211
  tools,
212
212
  toolChoice
213
213
  });
@@ -216,7 +216,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
216
216
  const toolCall = completion.toolCalls[0];
217
217
  switch (toolCall.toolName) {
218
218
  case "finish_test": {
219
- args = toolCall.args;
219
+ args = toolCall.input;
220
220
  const verdict = args.verdict || "inconclusive";
221
221
  const reasoning = args.reasoning || "No reasoning provided";
222
222
  const criteria = args.criteria || {};
@@ -314,7 +314,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
314
314
  model: mergedConfig.model,
315
315
  messages: reversedMessages,
316
316
  temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
317
- maxTokens: mergedConfig.maxTokens
317
+ maxOutputTokens: mergedConfig.maxTokens
318
318
  });
319
319
  const messageContent = completion.text;
320
320
  if (!messageContent) {
@@ -441,9 +441,9 @@ var ScenarioExecutionState = class {
441
441
  };
442
442
 
443
443
  // src/utils/convert-core-messages-to-agui-messages.ts
444
- function convertCoreMessagesToAguiMessages(coreMessages) {
444
+ function convertModelMessagesToAguiMessages(modelMessages) {
445
445
  const aguiMessages = [];
446
- for (const msg of coreMessages) {
446
+ for (const msg of modelMessages) {
447
447
  const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
448
448
  switch (true) {
449
449
  case msg.role === "system":
@@ -487,7 +487,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
487
487
  type: "function",
488
488
  function: {
489
489
  name: c.toolName,
490
- arguments: JSON.stringify(c.args)
490
+ arguments: JSON.stringify(c.input)
491
491
  }
492
492
  }))
493
493
  });
@@ -495,11 +495,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
495
495
  }
496
496
  case msg.role === "tool":
497
497
  msg.content.map((p, i) => {
498
+ var _a;
498
499
  aguiMessages.push({
499
500
  id: `${id}-${i}`,
500
501
  role: "tool",
501
502
  toolCallId: p.toolCallId,
502
- content: JSON.stringify(p.result)
503
+ content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
503
504
  });
504
505
  });
505
506
  break;
@@ -509,7 +510,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
509
510
  }
510
511
  return aguiMessages;
511
512
  }
512
- var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
513
+ var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
513
514
 
514
515
  // src/execution/scenario-execution.ts
515
516
  var ScenarioExecution = class {
@@ -575,7 +576,7 @@ var ScenarioExecution = class {
575
576
  /**
576
577
  * Gets the complete conversation history as an array of messages.
577
578
  *
578
- * @returns Array of CoreMessage objects representing the full conversation
579
+ * @returns Array of ModelMessage objects representing the full conversation
579
580
  */
580
581
  get messages() {
581
582
  return this.state.messages;
@@ -808,7 +809,7 @@ var ScenarioExecution = class {
808
809
  * - "assistant" messages are routed to AGENT role agents
809
810
  * - Other message types are added directly to the conversation
810
811
  *
811
- * @param message - The CoreMessage to add to the conversation
812
+ * @param message - The ModelMessage to add to the conversation
812
813
  *
813
814
  * @example
814
815
  * ```typescript
@@ -837,7 +838,7 @@ var ScenarioExecution = class {
837
838
  *
838
839
  * This method is part of the ScenarioExecutionLike interface used by script steps.
839
840
  *
840
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
841
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
841
842
  * If not provided, the user simulator agent will generate the content.
842
843
  *
843
844
  * @example
@@ -848,7 +849,7 @@ var ScenarioExecution = class {
848
849
  * // Let user simulator generate content
849
850
  * await execution.user();
850
851
  *
851
- * // Use a CoreMessage object
852
+ * // Use a ModelMessage object
852
853
  * await execution.user({
853
854
  * role: "user",
854
855
  * content: "Tell me a joke"
@@ -867,7 +868,7 @@ var ScenarioExecution = class {
867
868
  *
868
869
  * This method is part of the ScenarioExecutionLike interface used by script steps.
869
870
  *
870
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
871
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
871
872
  * If not provided, the agent under test will generate the response.
872
873
  *
873
874
  * @example
@@ -878,7 +879,7 @@ var ScenarioExecution = class {
878
879
  * // Use provided content
879
880
  * await execution.agent("The weather is sunny today!");
880
881
  *
881
- * // Use a CoreMessage object
882
+ * // Use a ModelMessage object
882
883
  * await execution.agent({
883
884
  * role: "assistant",
884
885
  * content: "I'm here to help you with weather information."
@@ -1600,14 +1601,13 @@ function formatPart(part) {
1600
1601
  case "file":
1601
1602
  return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
1602
1603
  case "tool-call":
1603
- return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
1604
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
1604
1605
  case "tool-result":
1605
- return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
1606
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
1606
1607
  case "reasoning":
1607
1608
  return `(reasoning): ${part.text}`;
1608
- case "redacted-reasoning":
1609
- return `(redacted reasoning): ${part.data}`;
1610
1609
  default:
1610
+ part;
1611
1611
  return `Unknown content: ${JSON.stringify(part)}`;
1612
1612
  }
1613
1613
  }
@@ -38,7 +38,7 @@ var import_path = __toESM(require("path"));
38
38
  var import_chalk = __toESM(require("chalk"));
39
39
 
40
40
  // src/config/env.ts
41
- var import_zod = require("zod");
41
+ var import_v4 = require("zod/v4");
42
42
 
43
43
  // src/config/log-levels.ts
44
44
  var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
@@ -51,37 +51,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
51
51
  var LOG_LEVELS = Object.values(LogLevel);
52
52
 
53
53
  // src/config/env.ts
54
- var envSchema = import_zod.z.object({
54
+ var envSchema = import_v4.z.object({
55
55
  /**
56
56
  * LangWatch API key for event reporting.
57
57
  * If not provided, events will not be sent to LangWatch.
58
58
  */
59
- LANGWATCH_API_KEY: import_zod.z.string().optional(),
59
+ LANGWATCH_API_KEY: import_v4.z.string().optional(),
60
60
  /**
61
61
  * LangWatch endpoint URL for event reporting.
62
62
  * Defaults to the production LangWatch endpoint.
63
63
  */
64
- LANGWATCH_ENDPOINT: import_zod.z.string().url().optional().default("https://app.langwatch.ai"),
64
+ LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
65
65
  /**
66
66
  * Disables simulation report info messages when set to any truthy value.
67
67
  * Useful for CI/CD environments or when you want cleaner output.
68
68
  */
69
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod.z.string().optional().transform((val) => Boolean(val)),
69
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
70
70
  /**
71
71
  * Node environment - affects logging and behavior.
72
72
  * Defaults to 'development' if not specified.
73
73
  */
74
- NODE_ENV: import_zod.z.enum(["development", "production", "test"]).default("development"),
74
+ NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
75
75
  /**
76
76
  * Case-insensitive log level for the scenario package.
77
77
  * Defaults to 'info' if not specified.
78
78
  */
79
- LOG_LEVEL: import_zod.z.string().toUpperCase().pipe(import_zod.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
79
+ LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
80
80
  /**
81
81
  * Scenario batch run ID.
82
82
  * If not provided, a random ID will be generated.
83
83
  */
84
- SCENARIO_BATCH_RUN_ID: import_zod.z.string().optional()
84
+ SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
85
85
  });
86
86
  function getEnv() {
87
87
  return envSchema.parse(process.env);
@@ -321,8 +321,12 @@ ${indent(parsedJson)}
321
321
  console.log();
322
322
  console.log(import_chalk.default.bold.cyan("=== Scenario Test Report ==="));
323
323
  console.log(`Total Scenarios: ${total}`);
324
- console.log(import_chalk.default.green(`Passed: ${passed}`));
325
- console.log(import_chalk.default.red(`Failed: ${failed}`));
324
+ console.log(
325
+ passed > 0 ? import_chalk.default.green(`Passed: ${passed}`) : `Passed: ${passed}`
326
+ );
327
+ console.log(
328
+ failed > 0 ? import_chalk.default.red(`Failed: ${failed}`) : `Failed: ${failed}`
329
+ );
326
330
  console.log(`Success Rate: ${import_chalk.default.bold(`${successRate}%`)}`);
327
331
  this.results.forEach((r, i) => {
328
332
  const statusColor = r.status === "SUCCESS" ? import_chalk.default.green : import_chalk.default.red;
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  Logger
3
- } from "../../chunk-OL4RFXV4.mjs";
3
+ } from "../../chunk-RHTLQKEJ.mjs";
4
4
  import "../../chunk-7P6ASYW6.mjs";
5
5
 
6
6
  // src/integrations/vitest/reporter.ts
@@ -161,8 +161,12 @@ ${indent(parsedJson)}
161
161
  console.log();
162
162
  console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
163
163
  console.log(`Total Scenarios: ${total}`);
164
- console.log(chalk.green(`Passed: ${passed}`));
165
- console.log(chalk.red(`Failed: ${failed}`));
164
+ console.log(
165
+ passed > 0 ? chalk.green(`Passed: ${passed}`) : `Passed: ${passed}`
166
+ );
167
+ console.log(
168
+ failed > 0 ? chalk.red(`Failed: ${failed}`) : `Failed: ${failed}`
169
+ );
166
170
  console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
167
171
  this.results.forEach((r, i) => {
168
172
  const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;