@langwatch/scenario 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -17,11 +17,11 @@ import {
17
17
  getBatchRunId,
18
18
  getProjectConfig,
19
19
  scenarioProjectConfigSchema
20
- } from "./chunk-7HLDX5EL.mjs";
20
+ } from "./chunk-3Z7E24UI.mjs";
21
21
  import {
22
22
  Logger,
23
23
  getEnv
24
- } from "./chunk-OL4RFXV4.mjs";
24
+ } from "./chunk-RHTLQKEJ.mjs";
25
25
  import {
26
26
  __export
27
27
  } from "./chunk-7P6ASYW6.mjs";
@@ -35,7 +35,7 @@ __export(agents_exports, {
35
35
 
36
36
  // src/agents/judge-agent.ts
37
37
  import { generateText, tool } from "ai";
38
- import { z } from "zod";
38
+ import { z } from "zod/v4";
39
39
 
40
40
  // src/agents/utils.ts
41
41
  var toolMessageRole = "tool";
@@ -142,14 +142,14 @@ ${criteriaList}
142
142
  function buildContinueTestTool() {
143
143
  return tool({
144
144
  description: "Continue the test with the next step",
145
- parameters: z.object({})
145
+ inputSchema: z.object({})
146
146
  });
147
147
  }
148
148
  function buildFinishTestTool(criteria) {
149
149
  const criteriaNames = criteria.map(criterionToParamName);
150
150
  return tool({
151
151
  description: "Complete the test with a final verdict",
152
- parameters: z.object({
152
+ inputSchema: z.object({
153
153
  criteria: z.object(
154
154
  Object.fromEntries(
155
155
  criteriaNames.map((name, idx) => [
@@ -207,7 +207,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
207
207
  model: mergedConfig.model,
208
208
  messages,
209
209
  temperature: mergedConfig.temperature ?? 0,
210
- maxTokens: mergedConfig.maxTokens,
210
+ maxOutputTokens: mergedConfig.maxTokens,
211
211
  tools,
212
212
  toolChoice
213
213
  });
@@ -216,7 +216,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
216
216
  const toolCall = completion.toolCalls[0];
217
217
  switch (toolCall.toolName) {
218
218
  case "finish_test": {
219
- args = toolCall.args;
219
+ args = toolCall.input;
220
220
  const verdict = args.verdict || "inconclusive";
221
221
  const reasoning = args.reasoning || "No reasoning provided";
222
222
  const criteria = args.criteria || {};
@@ -314,7 +314,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
314
314
  model: mergedConfig.model,
315
315
  messages: reversedMessages,
316
316
  temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
317
- maxTokens: mergedConfig.maxTokens
317
+ maxOutputTokens: mergedConfig.maxTokens
318
318
  });
319
319
  const messageContent = completion.text;
320
320
  if (!messageContent) {
@@ -441,9 +441,9 @@ var ScenarioExecutionState = class {
441
441
  };
442
442
 
443
443
  // src/utils/convert-core-messages-to-agui-messages.ts
444
- function convertCoreMessagesToAguiMessages(coreMessages) {
444
+ function convertModelMessagesToAguiMessages(modelMessages) {
445
445
  const aguiMessages = [];
446
- for (const msg of coreMessages) {
446
+ for (const msg of modelMessages) {
447
447
  const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
448
448
  switch (true) {
449
449
  case msg.role === "system":
@@ -487,7 +487,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
487
487
  type: "function",
488
488
  function: {
489
489
  name: c.toolName,
490
- arguments: JSON.stringify(c.args)
490
+ arguments: JSON.stringify(c.input)
491
491
  }
492
492
  }))
493
493
  });
@@ -495,11 +495,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
495
495
  }
496
496
  case msg.role === "tool":
497
497
  msg.content.map((p, i) => {
498
+ var _a;
498
499
  aguiMessages.push({
499
500
  id: `${id}-${i}`,
500
501
  role: "tool",
501
502
  toolCallId: p.toolCallId,
502
- content: JSON.stringify(p.result)
503
+ content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
503
504
  });
504
505
  });
505
506
  break;
@@ -509,7 +510,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
509
510
  }
510
511
  return aguiMessages;
511
512
  }
512
- var convert_core_messages_to_agui_messages_default = convertCoreMessagesToAguiMessages;
513
+ var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
513
514
 
514
515
  // src/execution/scenario-execution.ts
515
516
  var ScenarioExecution = class {
@@ -575,7 +576,7 @@ var ScenarioExecution = class {
575
576
  /**
576
577
  * Gets the complete conversation history as an array of messages.
577
578
  *
578
- * @returns Array of CoreMessage objects representing the full conversation
579
+ * @returns Array of ModelMessage objects representing the full conversation
579
580
  */
580
581
  get messages() {
581
582
  return this.state.messages;
@@ -808,7 +809,7 @@ var ScenarioExecution = class {
808
809
  * - "assistant" messages are routed to AGENT role agents
809
810
  * - Other message types are added directly to the conversation
810
811
  *
811
- * @param message - The CoreMessage to add to the conversation
812
+ * @param message - The ModelMessage to add to the conversation
812
813
  *
813
814
  * @example
814
815
  * ```typescript
@@ -837,7 +838,7 @@ var ScenarioExecution = class {
837
838
  *
838
839
  * This method is part of the ScenarioExecutionLike interface used by script steps.
839
840
  *
840
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
841
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
841
842
  * If not provided, the user simulator agent will generate the content.
842
843
  *
843
844
  * @example
@@ -848,7 +849,7 @@ var ScenarioExecution = class {
848
849
  * // Let user simulator generate content
849
850
  * await execution.user();
850
851
  *
851
- * // Use a CoreMessage object
852
+ * // Use a ModelMessage object
852
853
  * await execution.user({
853
854
  * role: "user",
854
855
  * content: "Tell me a joke"
@@ -867,7 +868,7 @@ var ScenarioExecution = class {
867
868
  *
868
869
  * This method is part of the ScenarioExecutionLike interface used by script steps.
869
870
  *
870
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
871
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
871
872
  * If not provided, the agent under test will generate the response.
872
873
  *
873
874
  * @example
@@ -878,7 +879,7 @@ var ScenarioExecution = class {
878
879
  * // Use provided content
879
880
  * await execution.agent("The weather is sunny today!");
880
881
  *
881
- * // Use a CoreMessage object
882
+ * // Use a ModelMessage object
882
883
  * await execution.agent({
883
884
  * role: "assistant",
884
885
  * content: "I'm here to help you with weather information."
@@ -1600,14 +1601,13 @@ function formatPart(part) {
1600
1601
  case "file":
1601
1602
  return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
1602
1603
  case "tool-call":
1603
- return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.args)})`;
1604
+ return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
1604
1605
  case "tool-result":
1605
- return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.result)})`;
1606
+ return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
1606
1607
  case "reasoning":
1607
1608
  return `(reasoning): ${part.text}`;
1608
- case "redacted-reasoning":
1609
- return `(redacted reasoning): ${part.data}`;
1610
1609
  default:
1610
+ part;
1611
1611
  return `Unknown content: ${JSON.stringify(part)}`;
1612
1612
  }
1613
1613
  }
@@ -38,7 +38,7 @@ var import_path = __toESM(require("path"));
38
38
  var import_chalk = __toESM(require("chalk"));
39
39
 
40
40
  // src/config/env.ts
41
- var import_zod = require("zod");
41
+ var import_v4 = require("zod/v4");
42
42
 
43
43
  // src/config/log-levels.ts
44
44
  var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
@@ -51,37 +51,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
51
51
  var LOG_LEVELS = Object.values(LogLevel);
52
52
 
53
53
  // src/config/env.ts
54
- var envSchema = import_zod.z.object({
54
+ var envSchema = import_v4.z.object({
55
55
  /**
56
56
  * LangWatch API key for event reporting.
57
57
  * If not provided, events will not be sent to LangWatch.
58
58
  */
59
- LANGWATCH_API_KEY: import_zod.z.string().optional(),
59
+ LANGWATCH_API_KEY: import_v4.z.string().optional(),
60
60
  /**
61
61
  * LangWatch endpoint URL for event reporting.
62
62
  * Defaults to the production LangWatch endpoint.
63
63
  */
64
- LANGWATCH_ENDPOINT: import_zod.z.string().url().optional().default("https://app.langwatch.ai"),
64
+ LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
65
65
  /**
66
66
  * Disables simulation report info messages when set to any truthy value.
67
67
  * Useful for CI/CD environments or when you want cleaner output.
68
68
  */
69
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod.z.string().optional().transform((val) => Boolean(val)),
69
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
70
70
  /**
71
71
  * Node environment - affects logging and behavior.
72
72
  * Defaults to 'development' if not specified.
73
73
  */
74
- NODE_ENV: import_zod.z.enum(["development", "production", "test"]).default("development"),
74
+ NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
75
75
  /**
76
76
  * Case-insensitive log level for the scenario package.
77
77
  * Defaults to 'info' if not specified.
78
78
  */
79
- LOG_LEVEL: import_zod.z.string().toUpperCase().pipe(import_zod.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
79
+ LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
80
80
  /**
81
81
  * Scenario batch run ID.
82
82
  * If not provided, a random ID will be generated.
83
83
  */
84
- SCENARIO_BATCH_RUN_ID: import_zod.z.string().optional()
84
+ SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
85
85
  });
86
86
  function getEnv() {
87
87
  return envSchema.parse(process.env);
@@ -321,8 +321,12 @@ ${indent(parsedJson)}
321
321
  console.log();
322
322
  console.log(import_chalk.default.bold.cyan("=== Scenario Test Report ==="));
323
323
  console.log(`Total Scenarios: ${total}`);
324
- console.log(import_chalk.default.green(`Passed: ${passed}`));
325
- console.log(import_chalk.default.red(`Failed: ${failed}`));
324
+ console.log(
325
+ passed > 0 ? import_chalk.default.green(`Passed: ${passed}`) : `Passed: ${passed}`
326
+ );
327
+ console.log(
328
+ failed > 0 ? import_chalk.default.red(`Failed: ${failed}`) : `Failed: ${failed}`
329
+ );
326
330
  console.log(`Success Rate: ${import_chalk.default.bold(`${successRate}%`)}`);
327
331
  this.results.forEach((r, i) => {
328
332
  const statusColor = r.status === "SUCCESS" ? import_chalk.default.green : import_chalk.default.red;
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  Logger
3
- } from "../../chunk-OL4RFXV4.mjs";
3
+ } from "../../chunk-RHTLQKEJ.mjs";
4
4
  import "../../chunk-7P6ASYW6.mjs";
5
5
 
6
6
  // src/integrations/vitest/reporter.ts
@@ -161,8 +161,12 @@ ${indent(parsedJson)}
161
161
  console.log();
162
162
  console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
163
163
  console.log(`Total Scenarios: ${total}`);
164
- console.log(chalk.green(`Passed: ${passed}`));
165
- console.log(chalk.red(`Failed: ${failed}`));
164
+ console.log(
165
+ passed > 0 ? chalk.green(`Passed: ${passed}`) : `Passed: ${passed}`
166
+ );
167
+ console.log(
168
+ failed > 0 ? chalk.red(`Failed: ${failed}`) : `Failed: ${failed}`
169
+ );
166
170
  console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
167
171
  this.results.forEach((r, i) => {
168
172
  const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
@@ -25,6 +25,6 @@ __export(setup_global_exports, {
25
25
  module.exports = __toCommonJS(setup_global_exports);
26
26
  var import_xksuid = require("xksuid");
27
27
  function setup() {
28
- const scenarioBatchRunId = `scenariobatchrun_${(0, import_xksuid.generate)()}`;
28
+ const scenarioBatchRunId = `scenariobatch_${(0, import_xksuid.generate)()}`;
29
29
  process.env.SCENARIO_BATCH_RUN_ID = scenarioBatchRunId;
30
30
  }
@@ -3,7 +3,7 @@ import "../../chunk-7P6ASYW6.mjs";
3
3
  // src/integrations/vitest/setup-global.ts
4
4
  import { generate } from "xksuid";
5
5
  function setup() {
6
- const scenarioBatchRunId = `scenariobatchrun_${generate()}`;
6
+ const scenarioBatchRunId = `scenariobatch_${generate()}`;
7
7
  process.env.SCENARIO_BATCH_RUN_ID = scenarioBatchRunId;
8
8
  }
9
9
  export {