@langwatch/scenario 0.2.9 → 0.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,186 @@
1
1
  import {
2
- VitestReporter
3
- } from "../../chunk-K7KLHTDI.mjs";
4
- import "../../chunk-YPJZSK4J.mjs";
2
+ Logger
3
+ } from "../../chunk-OL4RFXV4.mjs";
5
4
  import "../../chunk-7P6ASYW6.mjs";
5
+
6
+ // src/integrations/vitest/reporter.ts
7
+ import fs from "fs";
8
+ import path from "path";
9
+ import chalk from "chalk";
10
+ var logger = Logger.create("integrations:vitest:reporter");
11
+ function getProjectRoot() {
12
+ return process.cwd();
13
+ }
14
+ var projectRoot = getProjectRoot();
15
+ var logDir = path.join(projectRoot, ".scenario");
16
+ if (!fs.existsSync(logDir)) fs.mkdirSync(logDir);
17
+ function getLogFilePath(testId) {
18
+ return path.join(logDir, `${testId}.log`);
19
+ }
20
+ function getFullTestName(task) {
21
+ let name = task.name;
22
+ let parent = task.suite;
23
+ while (parent) {
24
+ name = `${parent.name} > ${name}`;
25
+ parent = parent.suite;
26
+ }
27
+ return name;
28
+ }
29
+ function indent(str, n = 2) {
30
+ return str.replace(/^/gm, " ".repeat(n));
31
+ }
32
+ var VitestReporter = class {
33
+ results = [];
34
+ async onTestCaseResult(test) {
35
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
36
+ const fullName = getFullTestName(test);
37
+ const filePath = getLogFilePath(test.id);
38
+ if (!fs.existsSync(filePath)) {
39
+ logger.warn(
40
+ `No log file found ${filePath} for test ${fullName}`,
41
+ test.id
42
+ );
43
+ return;
44
+ }
45
+ const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
46
+ const events = lines.map((line) => JSON.parse(line));
47
+ const runs = /* @__PURE__ */ new Map();
48
+ for (const event of events) {
49
+ const runId = event.scenarioRunId ?? "unknown";
50
+ if (!runs.has(runId)) runs.set(runId, []);
51
+ runs.get(runId).push(event);
52
+ }
53
+ for (const [runId, runEvents] of Array.from(runs.entries())) {
54
+ const started = runEvents.find(
55
+ (e) => e.type === "SCENARIO_RUN_STARTED"
56
+ );
57
+ const finished = runEvents.find(
58
+ (e) => e.type === "SCENARIO_RUN_FINISHED"
59
+ );
60
+ const messages = runEvents.filter(
61
+ (e) => e.type === "SCENARIO_MESSAGE_SNAPSHOT"
62
+ );
63
+ this.results.push({
64
+ name: ((_a = started == null ? void 0 : started.metadata) == null ? void 0 : _a.name) ?? fullName,
65
+ status: (finished == null ? void 0 : finished.status) ?? "UNKNOWN",
66
+ duration: started && finished ? finished.timestamp - started.timestamp : 0,
67
+ reasoning: (_b = finished == null ? void 0 : finished.results) == null ? void 0 : _b.reasoning,
68
+ criteria: (finished == null ? void 0 : finished.results) ? `Success Criteria: ${((_c = finished.results.metCriteria) == null ? void 0 : _c.length) ?? 0}/${(((_d = finished.results.metCriteria) == null ? void 0 : _d.length) ?? 0) + (((_e = finished.results.unmetCriteria) == null ? void 0 : _e.length) ?? 0)}` : void 0
69
+ });
70
+ console.log(
71
+ `
72
+ --- Scenario Run: ${((_f = started == null ? void 0 : started.metadata) == null ? void 0 : _f.name) ?? runId} ---`
73
+ );
74
+ if (started) {
75
+ console.log(`Description: ${((_g = started.metadata) == null ? void 0 : _g.description) ?? ""}`);
76
+ }
77
+ if (messages.length) {
78
+ console.log("Chat log:\n");
79
+ let lastMessageCount = 0;
80
+ for (const msg of messages) {
81
+ const allMessages = msg.messages ?? [];
82
+ for (const m of allMessages.slice(lastMessageCount)) {
83
+ const role = m.role;
84
+ if (role.toLowerCase() === "assistant" && "toolCalls" in m && Array.isArray(m.toolCalls) && m.toolCalls.length > 0) {
85
+ for (const toolCall of m.toolCalls) {
86
+ const functionName = toolCall.function.name;
87
+ let parsedJson = "";
88
+ try {
89
+ parsedJson = JSON.stringify(
90
+ JSON.parse(toolCall.function.arguments),
91
+ null,
92
+ 2
93
+ );
94
+ } catch {
95
+ parsedJson = toolCall.function.arguments;
96
+ }
97
+ const role2 = chalk.magenta(`ToolCall(${functionName}):`);
98
+ console.log(`${role2}:
99
+
100
+ ${indent(parsedJson)}
101
+ `);
102
+ }
103
+ continue;
104
+ }
105
+ let roleLabel = role;
106
+ if (role.toLowerCase() === "user") roleLabel = chalk.green("User");
107
+ else if (role.toLowerCase() === "agent")
108
+ roleLabel = chalk.cyan("Agent");
109
+ else if (role.toLowerCase() === "assistant")
110
+ if (Array.isArray(m.content) && typeof m.content.at(0) === "object" && ((_h = m.content.at(0)) == null ? void 0 : _h.type) === "tool-call")
111
+ roleLabel = chalk.cyan("ToolCall");
112
+ else roleLabel = chalk.cyan("Assistant");
113
+ else if (role.toLowerCase() === "tool") {
114
+ roleLabel = chalk.magenta("ToolResult");
115
+ let parsedJson = "";
116
+ try {
117
+ parsedJson = JSON.stringify(JSON.parse(m.content), null, 2);
118
+ } catch {
119
+ parsedJson = m.content;
120
+ }
121
+ console.log(`${roleLabel}:
122
+
123
+ ${indent(parsedJson)}
124
+ `);
125
+ continue;
126
+ } else roleLabel = chalk.yellow(role);
127
+ console.log(`${roleLabel}: ${m.content}`);
128
+ }
129
+ lastMessageCount = allMessages.length;
130
+ }
131
+ }
132
+ if (finished) {
133
+ console.log("--- Verdict ---");
134
+ console.log(`Status: ${finished.status}`);
135
+ if (finished.results) {
136
+ console.log(`Verdict: ${finished.results.verdict}`);
137
+ if (finished.results.reasoning)
138
+ console.log(`Reasoning: ${finished.results.reasoning}`);
139
+ if ((_i = finished.results.metCriteria) == null ? void 0 : _i.length)
140
+ console.log(
141
+ `Met criteria: ${finished.results.metCriteria.join(", ")}`
142
+ );
143
+ if ((_j = finished.results.unmetCriteria) == null ? void 0 : _j.length)
144
+ console.log(
145
+ `Unmet criteria: ${finished.results.unmetCriteria.join(", ")}`
146
+ );
147
+ if (finished.results.error)
148
+ console.log(`Error: ${finished.results.error}`);
149
+ }
150
+ }
151
+ console.log("-----------------------------\n");
152
+ }
153
+ fs.unlinkSync(filePath);
154
+ }
155
+ async onTestRunEnd() {
156
+ if (this.results.length === 0) return;
157
+ const total = this.results.length;
158
+ const passed = this.results.filter((r) => r.status === "SUCCESS").length;
159
+ const failed = this.results.filter((r) => r.status !== "SUCCESS").length;
160
+ const successRate = (passed / total * 100).toFixed(1);
161
+ console.log();
162
+ console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
163
+ console.log(`Total Scenarios: ${total}`);
164
+ console.log(chalk.green(`Passed: ${passed}`));
165
+ console.log(chalk.red(`Failed: ${failed}`));
166
+ console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
167
+ this.results.forEach((r, i) => {
168
+ const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
169
+ console.log();
170
+ console.log(
171
+ `${i + 1}. ${r.name} - ${statusColor(r.status)} in ${(r.duration / 1e3).toFixed(2)}s`
172
+ );
173
+ if (r.reasoning) {
174
+ console.log(chalk.greenBright(" Reasoning: ") + r.reasoning);
175
+ }
176
+ if (r.criteria) {
177
+ console.log(chalk.bold(" " + r.criteria));
178
+ }
179
+ });
180
+ console.log();
181
+ }
182
+ };
183
+ var reporter_default = VitestReporter;
6
184
  export {
7
- VitestReporter as default
185
+ reporter_default as default
8
186
  };
@@ -41,6 +41,7 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
41
41
  LogLevel2["DEBUG"] = "DEBUG";
42
42
  return LogLevel2;
43
43
  })(LogLevel || {});
44
+ var LOG_LEVELS = Object.values(LogLevel);
44
45
 
45
46
  // src/config/env.ts
46
47
  var envSchema = import_zod.z.object({
@@ -53,7 +54,7 @@ var envSchema = import_zod.z.object({
53
54
  * LangWatch endpoint URL for event reporting.
54
55
  * Defaults to the production LangWatch endpoint.
55
56
  */
56
- LANGWATCH_ENDPOINT: import_zod.z.string().url().default("https://app.langwatch.ai"),
57
+ LANGWATCH_ENDPOINT: import_zod.z.string().url().optional().default("https://app.langwatch.ai"),
57
58
  /**
58
59
  * Disables simulation report info messages when set to any truthy value.
59
60
  * Useful for CI/CD environments or when you want cleaner output.
@@ -65,17 +66,19 @@ var envSchema = import_zod.z.object({
65
66
  */
66
67
  NODE_ENV: import_zod.z.enum(["development", "production", "test"]).default("development"),
67
68
  /**
68
- * Log level for the scenario package.
69
+ * Case-insensitive log level for the scenario package.
69
70
  * Defaults to 'info' if not specified.
70
71
  */
71
- LOG_LEVEL: import_zod.z.nativeEnum(LogLevel).optional(),
72
+ LOG_LEVEL: import_zod.z.string().toUpperCase().pipe(import_zod.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
72
73
  /**
73
74
  * Scenario batch run ID.
74
75
  * If not provided, a random ID will be generated.
75
76
  */
76
77
  SCENARIO_BATCH_RUN_ID: import_zod.z.string().optional()
77
78
  });
78
- var env = envSchema.parse(process.env);
79
+ function getEnv() {
80
+ return envSchema.parse(process.env);
81
+ }
79
82
 
80
83
  // src/config/load.ts
81
84
  var import_promises = __toESM(require("fs/promises"));
@@ -104,18 +107,27 @@ var Logger = class _Logger {
104
107
  static create(context) {
105
108
  return new _Logger(context);
106
109
  }
107
- getLogLevel() {
108
- return env.LOG_LEVEL ?? "INFO" /* INFO */;
110
+ /**
111
+ * Returns the current log level from environment.
112
+ * Uses a getter for clarity and idiomatic usage.
113
+ */
114
+ get LOG_LEVEL() {
115
+ return getEnv().LOG_LEVEL;
109
116
  }
110
- getLogLevelIndex(level) {
111
- return Object.values(LogLevel).indexOf(level);
117
+ /**
118
+ * Returns the index of the given log level in the LOG_LEVELS array.
119
+ * @param level - The log level to get the index for.
120
+ * @returns The index of the log level in the LOG_LEVELS array.
121
+ */
122
+ getLogLevelIndexFor(level) {
123
+ return LOG_LEVELS.indexOf(level);
112
124
  }
113
125
  /**
114
126
  * Checks if logging should occur based on LOG_LEVEL env var
115
127
  */
116
128
  shouldLog(level) {
117
- const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
118
- const requestedLevelIndex = this.getLogLevelIndex(level);
129
+ const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
130
+ const requestedLevelIndex = this.getLogLevelIndexFor(level);
119
131
  return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
120
132
  }
121
133
  formatMessage(message) {
@@ -176,7 +188,6 @@ function getBatchRunId() {
176
188
  return batchRunId;
177
189
  }
178
190
  if (import_node_process.default.env.SCENARIO_BATCH_RUN_ID) {
179
- console.log("process.env.SCENARIO_BATCH_RUN_ID", import_node_process.default.env.SCENARIO_BATCH_RUN_ID);
180
191
  return batchRunId = import_node_process.default.env.SCENARIO_BATCH_RUN_ID;
181
192
  }
182
193
  if (import_node_process.default.env.VITEST_WORKER_ID || import_node_process.default.env.JEST_WORKER_ID) {
@@ -227,10 +238,11 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
227
238
  this.displayWatchMessage(params);
228
239
  }
229
240
  isGreetingDisabled() {
230
- return env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
241
+ return getEnv().SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
231
242
  }
232
243
  displayGreeting() {
233
244
  const separator = "\u2500".repeat(60);
245
+ const env = getEnv();
234
246
  if (!env.LANGWATCH_API_KEY) {
235
247
  console.log(`
236
248
  ${separator}`);
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  EventBus
3
- } from "../../chunk-7H6OGEQ5.mjs";
3
+ } from "../../chunk-7HLDX5EL.mjs";
4
4
  import {
5
5
  Logger
6
- } from "../../chunk-YPJZSK4J.mjs";
6
+ } from "../../chunk-OL4RFXV4.mjs";
7
7
  import "../../chunk-7P6ASYW6.mjs";
8
8
 
9
9
  // src/integrations/vitest/setup.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@langwatch/scenario",
3
- "version": "0.2.9",
3
+ "version": "0.2.12",
4
4
  "description": "A TypeScript library for testing AI agents using scenarios",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -28,34 +28,33 @@
28
28
  "pnpm": ">=8"
29
29
  },
30
30
  "dependencies": {
31
- "@ag-ui/core": "0.0.28",
32
- "@ai-sdk/openai": "1.3.22",
31
+ "@ag-ui/core": "^0.0.28",
32
+ "@ai-sdk/openai": "^1.3.22",
33
33
  "ai": ">=4.0.0",
34
- "chalk": "5.4.1",
35
- "rxjs": "7.8.2",
36
- "stringify": "5.2.0",
37
- "xksuid": "0.0.4",
38
- "zod": "3.24.4"
34
+ "chalk": "^5.4.1",
35
+ "rxjs": "^7.8.2",
36
+ "xksuid": "^0.0.4",
37
+ "zod": "^3.24.4"
39
38
  },
40
39
  "devDependencies": {
41
- "@eslint/js": "9.26.0",
40
+ "@eslint/js": "^9.26.0",
42
41
  "@types/jest": "^29.0.0",
43
- "@types/node": "22.15.15",
44
- "@typescript-eslint/parser": "8.32.0",
45
- "@typescript/native-preview": "7.0.0-dev.20250617.1",
46
- "dotenv": "16.5.0",
47
- "eslint": "9.26.0",
48
- "eslint-import-resolver-typescript": "4.3.4",
49
- "eslint-plugin-import": "2.31.0",
50
- "eslint-plugin-unused-imports": "4.1.4",
51
- "globals": "16.1.0",
42
+ "@types/node": "^22.15.15",
43
+ "@typescript-eslint/parser": "^8.32.0",
44
+ "@typescript/native-preview": "^7.0.0-dev.20250617.1",
45
+ "dotenv": "^16.5.0",
46
+ "eslint": "^9.26.0",
47
+ "eslint-import-resolver-typescript": "^4.3.4",
48
+ "eslint-plugin-import": "^2.31.0",
49
+ "eslint-plugin-unused-imports": "^4.1.4",
50
+ "globals": "^16.1.0",
52
51
  "jest": "^29.0.0",
53
52
  "ts-jest": "^29.0.0",
54
- "tsup": "8.4.0",
55
- "tsx": "4.19.4",
53
+ "tsup": "^8.4.0",
54
+ "tsx": "^4.19.4",
56
55
  "typescript": "^5.0.0",
57
- "typescript-eslint": "8.32.0",
58
- "vitest": "3.2.4"
56
+ "typescript-eslint": "^8.32.0",
57
+ "vitest": "^3.2.4"
59
58
  },
60
59
  "exports": {
61
60
  ".": {
@@ -1,146 +0,0 @@
1
- import {
2
- Logger
3
- } from "./chunk-YPJZSK4J.mjs";
4
-
5
- // src/integrations/vitest/reporter.ts
6
- import fs from "fs";
7
- import path from "path";
8
- import chalk from "chalk";
9
- var logger = Logger.create("integrations:vitest:reporter");
10
- function getProjectRoot() {
11
- return process.cwd();
12
- }
13
- var projectRoot = getProjectRoot();
14
- var logDir = path.join(projectRoot, ".scenario");
15
- if (!fs.existsSync(logDir)) fs.mkdirSync(logDir);
16
- function getLogFilePath(testId) {
17
- return path.join(logDir, `${testId}.log`);
18
- }
19
- function getFullTestName(task) {
20
- let name = task.name;
21
- let parent = task.suite;
22
- while (parent) {
23
- name = `${parent.name} > ${name}`;
24
- parent = parent.suite;
25
- }
26
- return name;
27
- }
28
- var VitestReporter = class {
29
- results = [];
30
- async onTestCaseResult(test) {
31
- var _a, _b, _c, _d, _e, _f, _g, _h, _i;
32
- const fullName = getFullTestName(test);
33
- const filePath = getLogFilePath(test.id);
34
- if (!fs.existsSync(filePath)) {
35
- logger.warn(
36
- `No log file found ${filePath} for test ${fullName}`,
37
- test.id
38
- );
39
- return;
40
- }
41
- const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
42
- const events = lines.map((line) => JSON.parse(line));
43
- const runs = /* @__PURE__ */ new Map();
44
- for (const event of events) {
45
- const runId = event.scenarioRunId ?? "unknown";
46
- if (!runs.has(runId)) runs.set(runId, []);
47
- runs.get(runId).push(event);
48
- }
49
- for (const [runId, runEvents] of Array.from(runs.entries())) {
50
- const started = runEvents.find(
51
- (e) => e.type === "SCENARIO_RUN_STARTED"
52
- );
53
- const finished = runEvents.find(
54
- (e) => e.type === "SCENARIO_RUN_FINISHED"
55
- );
56
- const messages = runEvents.filter(
57
- (e) => e.type === "SCENARIO_MESSAGE_SNAPSHOT"
58
- );
59
- this.results.push({
60
- name: ((_a = started == null ? void 0 : started.metadata) == null ? void 0 : _a.name) ?? fullName,
61
- status: (finished == null ? void 0 : finished.status) ?? "UNKNOWN",
62
- duration: started && finished ? finished.timestamp - started.timestamp : 0,
63
- reasoning: (_b = finished == null ? void 0 : finished.results) == null ? void 0 : _b.reasoning,
64
- criteria: (finished == null ? void 0 : finished.results) ? `Success Criteria: ${((_c = finished.results.metCriteria) == null ? void 0 : _c.length) ?? 0}/${(((_d = finished.results.metCriteria) == null ? void 0 : _d.length) ?? 0) + (((_e = finished.results.unmetCriteria) == null ? void 0 : _e.length) ?? 0)}` : void 0
65
- });
66
- console.log(
67
- `
68
- --- Scenario Run: ${((_f = started == null ? void 0 : started.metadata) == null ? void 0 : _f.name) ?? runId} ---`
69
- );
70
- if (started) {
71
- console.log(`Description: ${((_g = started.metadata) == null ? void 0 : _g.description) ?? ""}`);
72
- }
73
- if (messages.length) {
74
- console.log("Chat log:");
75
- let lastMessageCount = 0;
76
- for (const msg of messages) {
77
- const allMessages = msg.messages ?? [];
78
- for (const m of allMessages.slice(lastMessageCount)) {
79
- const role = m.role;
80
- let roleLabel = role;
81
- if (role.toLowerCase() === "user") roleLabel = chalk.green("User");
82
- else if (role.toLowerCase() === "agent")
83
- roleLabel = chalk.cyan("Agent");
84
- else if (role.toLowerCase() === "assistant")
85
- roleLabel = chalk.cyan("Assistant");
86
- else roleLabel = chalk.yellow(role);
87
- console.log(`${roleLabel}: ${m.content}`);
88
- }
89
- lastMessageCount = allMessages.length;
90
- }
91
- }
92
- if (finished) {
93
- console.log("--- Verdict ---");
94
- console.log(`Status: ${finished.status}`);
95
- if (finished.results) {
96
- console.log(`Verdict: ${finished.results.verdict}`);
97
- if (finished.results.reasoning)
98
- console.log(`Reasoning: ${finished.results.reasoning}`);
99
- if ((_h = finished.results.metCriteria) == null ? void 0 : _h.length)
100
- console.log(
101
- `Met criteria: ${finished.results.metCriteria.join(", ")}`
102
- );
103
- if ((_i = finished.results.unmetCriteria) == null ? void 0 : _i.length)
104
- console.log(
105
- `Unmet criteria: ${finished.results.unmetCriteria.join(", ")}`
106
- );
107
- if (finished.results.error)
108
- console.log(`Error: ${finished.results.error}`);
109
- }
110
- }
111
- console.log("-----------------------------\n");
112
- }
113
- fs.unlinkSync(filePath);
114
- }
115
- async onTestRunEnd() {
116
- if (this.results.length === 0) return;
117
- const total = this.results.length;
118
- const passed = this.results.filter((r) => r.status === "SUCCESS").length;
119
- const failed = this.results.filter((r) => r.status !== "SUCCESS").length;
120
- const successRate = (passed / total * 100).toFixed(1);
121
- console.log();
122
- console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
123
- console.log(`Total Scenarios: ${total}`);
124
- console.log(chalk.green(`Passed: ${passed}`));
125
- console.log(chalk.red(`Failed: ${failed}`));
126
- console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
127
- this.results.forEach((r, i) => {
128
- const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
129
- console.log();
130
- console.log(
131
- `${i + 1}. ${r.name} - ${statusColor(r.status)} in ${(r.duration / 1e3).toFixed(2)}s`
132
- );
133
- if (r.reasoning) {
134
- console.log(chalk.greenBright(" Reasoning: ") + r.reasoning);
135
- }
136
- if (r.criteria) {
137
- console.log(chalk.bold(" " + r.criteria));
138
- }
139
- });
140
- console.log();
141
- }
142
- };
143
-
144
- export {
145
- VitestReporter
146
- };