@langwatch/scenario 0.2.6 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/{chunk-MOOKAYIE.mjs → chunk-7H6OGEQ5.mjs} +76 -162
- package/dist/chunk-K7KLHTDI.mjs +146 -0
- package/dist/chunk-YPJZSK4J.mjs +121 -0
- package/dist/index.d.mts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +100 -65
- package/dist/index.mjs +16 -8
- package/dist/integrations/vitest/config.d.mts +5 -0
- package/dist/integrations/vitest/config.d.ts +5 -0
- package/dist/integrations/vitest/config.js +324 -0
- package/dist/integrations/vitest/config.mjs +35 -0
- package/dist/integrations/vitest/reporter.js +124 -1
- package/dist/integrations/vitest/reporter.mjs +4 -135
- package/dist/integrations/vitest/setup-global.d.mts +3 -0
- package/dist/integrations/vitest/setup-global.d.ts +3 -0
- package/dist/integrations/vitest/setup-global.js +30 -0
- package/dist/integrations/vitest/setup-global.mjs +11 -0
- package/dist/integrations/vitest/setup.js +97 -67
- package/dist/integrations/vitest/setup.mjs +7 -3
- package/package.json +11 -1
package/README.md
CHANGED
|
@@ -97,7 +97,7 @@ describe("Weather Agent", () => {
|
|
|
97
97
|
role: AgentRole.AGENT,
|
|
98
98
|
call: async (input) => {
|
|
99
99
|
const response = await generateText({
|
|
100
|
-
model: openai("gpt-4.1
|
|
100
|
+
model: openai("gpt-4.1"),
|
|
101
101
|
system: `You are a helpful assistant that may help the user with weather information.`,
|
|
102
102
|
messages: input.messages,
|
|
103
103
|
tools: { get_current_weather: getCurrentWeather },
|
|
@@ -122,7 +122,7 @@ describe("Weather Agent", () => {
|
|
|
122
122
|
description: "The user asks for the weather in a specific city, and the agent should use the weather tool to find it.",
|
|
123
123
|
agents: [
|
|
124
124
|
weatherAgent,
|
|
125
|
-
scenario.userSimulatorAgent({ model: openai("gpt-4.1
|
|
125
|
+
scenario.userSimulatorAgent({ model: openai("gpt-4.1") }),
|
|
126
126
|
],
|
|
127
127
|
script: [
|
|
128
128
|
scenario.user("What's the weather like in Barcelona?"),
|
|
@@ -240,7 +240,7 @@ The following configuration options are all optional. You can specify any combin
|
|
|
240
240
|
|
|
241
241
|
You can control the library's behavior with the following environment variables:
|
|
242
242
|
|
|
243
|
-
- `
|
|
243
|
+
- `LOG_LEVEL`: Sets the verbosity of the internal logger. Can be `error`, `warn`, `info`, or `debug`. By default, logging is silent.
|
|
244
244
|
- `SCENARIO_DISABLE_SIMULATION_REPORT_INFO`: Set to `true` to disable the "Scenario Simulation Reporting" banner that is printed to the console when a test run starts.
|
|
245
245
|
- `LANGWATCH_API_KEY`: Your LangWatch API key. This is used as a fallback if `langwatchApiKey` is not set in your config file.
|
|
246
246
|
- `LANGWATCH_ENDPOINT`: The LangWatch reporting endpoint. This is used as a fallback if `langwatchEndpoint` is not set in your config file.
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Logger,
|
|
3
|
+
env
|
|
4
|
+
} from "./chunk-YPJZSK4J.mjs";
|
|
1
5
|
import {
|
|
2
6
|
__export
|
|
3
7
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -105,124 +109,7 @@ async function loadScenarioProjectConfig() {
|
|
|
105
109
|
return await scenarioProjectConfigSchema.parseAsync({});
|
|
106
110
|
}
|
|
107
111
|
|
|
108
|
-
// src/
|
|
109
|
-
var Logger = class _Logger {
|
|
110
|
-
constructor(context) {
|
|
111
|
-
this.context = context;
|
|
112
|
-
}
|
|
113
|
-
/**
|
|
114
|
-
* Creates a logger with context (e.g., class name)
|
|
115
|
-
*/
|
|
116
|
-
static create(context) {
|
|
117
|
-
return new _Logger(context);
|
|
118
|
-
}
|
|
119
|
-
getLogLevel() {
|
|
120
|
-
return env.SCENARIO_LOG_LEVEL ?? "INFO" /* INFO */;
|
|
121
|
-
}
|
|
122
|
-
getLogLevelIndex(level) {
|
|
123
|
-
return Object.values(LogLevel).indexOf(level);
|
|
124
|
-
}
|
|
125
|
-
/**
|
|
126
|
-
* Checks if logging should occur based on LOG_LEVEL env var
|
|
127
|
-
*/
|
|
128
|
-
shouldLog(level) {
|
|
129
|
-
const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
|
|
130
|
-
const requestedLevelIndex = this.getLogLevelIndex(level);
|
|
131
|
-
return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
|
|
132
|
-
}
|
|
133
|
-
formatMessage(message) {
|
|
134
|
-
return this.context ? `[${this.context}] ${message}` : message;
|
|
135
|
-
}
|
|
136
|
-
error(message, data) {
|
|
137
|
-
if (this.shouldLog("ERROR" /* ERROR */)) {
|
|
138
|
-
const formattedMessage = this.formatMessage(message);
|
|
139
|
-
if (data) {
|
|
140
|
-
console.error(formattedMessage, data);
|
|
141
|
-
} else {
|
|
142
|
-
console.error(formattedMessage);
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
warn(message, data) {
|
|
147
|
-
if (this.shouldLog("WARN" /* WARN */)) {
|
|
148
|
-
const formattedMessage = this.formatMessage(message);
|
|
149
|
-
if (data) {
|
|
150
|
-
console.warn(formattedMessage, data);
|
|
151
|
-
} else {
|
|
152
|
-
console.warn(formattedMessage);
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
info(message, data) {
|
|
157
|
-
if (this.shouldLog("INFO" /* INFO */)) {
|
|
158
|
-
const formattedMessage = this.formatMessage(message);
|
|
159
|
-
if (data) {
|
|
160
|
-
console.info(formattedMessage, data);
|
|
161
|
-
} else {
|
|
162
|
-
console.info(formattedMessage);
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
debug(message, data) {
|
|
167
|
-
if (this.shouldLog("DEBUG" /* DEBUG */)) {
|
|
168
|
-
const formattedMessage = this.formatMessage(message);
|
|
169
|
-
if (data) {
|
|
170
|
-
console.log(formattedMessage, data);
|
|
171
|
-
} else {
|
|
172
|
-
console.log(formattedMessage);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
// src/config/env.ts
|
|
179
|
-
import { z as z2 } from "zod";
|
|
180
|
-
|
|
181
|
-
// src/config/log-levels.ts
|
|
182
|
-
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
183
|
-
LogLevel2["ERROR"] = "ERROR";
|
|
184
|
-
LogLevel2["WARN"] = "WARN";
|
|
185
|
-
LogLevel2["INFO"] = "INFO";
|
|
186
|
-
LogLevel2["DEBUG"] = "DEBUG";
|
|
187
|
-
return LogLevel2;
|
|
188
|
-
})(LogLevel || {});
|
|
189
|
-
|
|
190
|
-
// src/config/env.ts
|
|
191
|
-
var envSchema = z2.object({
|
|
192
|
-
/**
|
|
193
|
-
* LangWatch API key for event reporting.
|
|
194
|
-
* If not provided, events will not be sent to LangWatch.
|
|
195
|
-
*/
|
|
196
|
-
LANGWATCH_API_KEY: z2.string().optional(),
|
|
197
|
-
/**
|
|
198
|
-
* LangWatch endpoint URL for event reporting.
|
|
199
|
-
* Defaults to the production LangWatch endpoint.
|
|
200
|
-
*/
|
|
201
|
-
LANGWATCH_ENDPOINT: z2.string().url().default("https://app.langwatch.ai"),
|
|
202
|
-
/**
|
|
203
|
-
* Disables simulation report info messages when set to any truthy value.
|
|
204
|
-
* Useful for CI/CD environments or when you want cleaner output.
|
|
205
|
-
*/
|
|
206
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z2.string().optional().transform((val) => Boolean(val)),
|
|
207
|
-
/**
|
|
208
|
-
* Node environment - affects logging and behavior.
|
|
209
|
-
* Defaults to 'development' if not specified.
|
|
210
|
-
*/
|
|
211
|
-
NODE_ENV: z2.enum(["development", "production", "test"]).default("development"),
|
|
212
|
-
/**
|
|
213
|
-
* Log level for the scenario package.
|
|
214
|
-
* Defaults to 'info' if not specified.
|
|
215
|
-
*/
|
|
216
|
-
SCENARIO_LOG_LEVEL: z2.nativeEnum(LogLevel).optional(),
|
|
217
|
-
/**
|
|
218
|
-
* Scenario batch run ID.
|
|
219
|
-
* If not provided, a random ID will be generated.
|
|
220
|
-
*/
|
|
221
|
-
SCENARIO_BATCH_RUN_ID: z2.string().optional()
|
|
222
|
-
});
|
|
223
|
-
var env = envSchema.parse(process.env);
|
|
224
|
-
|
|
225
|
-
// src/config/index.ts
|
|
112
|
+
// src/config/get-project-config.ts
|
|
226
113
|
var logger = new Logger("scenario.config");
|
|
227
114
|
var configLoaded = false;
|
|
228
115
|
var config = null;
|
|
@@ -252,7 +139,10 @@ async function getProjectConfig() {
|
|
|
252
139
|
}
|
|
253
140
|
|
|
254
141
|
// src/utils/ids.ts
|
|
142
|
+
import crypto from "node:crypto";
|
|
143
|
+
import process2 from "node:process";
|
|
255
144
|
import { generate, parse } from "xksuid";
|
|
145
|
+
var batchRunId;
|
|
256
146
|
function generateThreadId() {
|
|
257
147
|
return `thread_${generate()}`;
|
|
258
148
|
}
|
|
@@ -263,10 +153,31 @@ function generateScenarioId() {
|
|
|
263
153
|
return `scenario_${generate()}`;
|
|
264
154
|
}
|
|
265
155
|
function getBatchRunId() {
|
|
266
|
-
if (
|
|
267
|
-
|
|
268
|
-
}
|
|
269
|
-
|
|
156
|
+
if (batchRunId) {
|
|
157
|
+
return batchRunId;
|
|
158
|
+
}
|
|
159
|
+
if (process2.env.SCENARIO_BATCH_RUN_ID) {
|
|
160
|
+
console.log("process.env.SCENARIO_BATCH_RUN_ID", process2.env.SCENARIO_BATCH_RUN_ID);
|
|
161
|
+
return batchRunId = process2.env.SCENARIO_BATCH_RUN_ID;
|
|
162
|
+
}
|
|
163
|
+
if (process2.env.VITEST_WORKER_ID || process2.env.JEST_WORKER_ID) {
|
|
164
|
+
const parentProcessId = process2.ppid;
|
|
165
|
+
const now = /* @__PURE__ */ new Date();
|
|
166
|
+
const year = now.getUTCFullYear();
|
|
167
|
+
const week = String(getISOWeekNumber(now)).padStart(2, "0");
|
|
168
|
+
const raw = `${parentProcessId}_${year}_w${week}`;
|
|
169
|
+
const hash = crypto.createHash("sha256").update(raw).digest("hex").slice(0, 12);
|
|
170
|
+
return batchRunId = `scenariobatchrun_${hash}`;
|
|
171
|
+
}
|
|
172
|
+
return batchRunId = `scenariobatchrun_${generate()}`;
|
|
173
|
+
}
|
|
174
|
+
function getISOWeekNumber(date) {
|
|
175
|
+
const tmp = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
|
176
|
+
const dayNum = tmp.getUTCDay() || 7;
|
|
177
|
+
tmp.setUTCDate(tmp.getUTCDate() + 4 - dayNum);
|
|
178
|
+
const yearStart = new Date(Date.UTC(tmp.getUTCFullYear(), 0, 1));
|
|
179
|
+
const weekNo = Math.ceil(((tmp.getTime() - yearStart.getTime()) / 864e5 + 1) / 7);
|
|
180
|
+
return weekNo;
|
|
270
181
|
}
|
|
271
182
|
function generateMessageId() {
|
|
272
183
|
return `scenariomsg_${generate()}`;
|
|
@@ -283,12 +194,11 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
283
194
|
if (this.isGreetingDisabled()) {
|
|
284
195
|
return;
|
|
285
196
|
}
|
|
286
|
-
|
|
287
|
-
if (_EventAlertMessageLogger.shownBatchIds.has(batchRunId)) {
|
|
197
|
+
if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
|
|
288
198
|
return;
|
|
289
199
|
}
|
|
290
|
-
_EventAlertMessageLogger.shownBatchIds.add(
|
|
291
|
-
this.displayGreeting(
|
|
200
|
+
_EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
|
|
201
|
+
this.displayGreeting();
|
|
292
202
|
}
|
|
293
203
|
/**
|
|
294
204
|
* Shows a fancy message about how to watch the simulation.
|
|
@@ -303,7 +213,7 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
303
213
|
isGreetingDisabled() {
|
|
304
214
|
return env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
|
|
305
215
|
}
|
|
306
|
-
displayGreeting(
|
|
216
|
+
displayGreeting() {
|
|
307
217
|
const separator = "\u2500".repeat(60);
|
|
308
218
|
if (!env.LANGWATCH_API_KEY) {
|
|
309
219
|
console.log(`
|
|
@@ -317,7 +227,10 @@ ${separator}`);
|
|
|
317
227
|
console.log(" \u2022 Set LANGWATCH_API_KEY environment variable");
|
|
318
228
|
console.log(" \u2022 Or configure apiKey in scenario.config.js");
|
|
319
229
|
console.log("");
|
|
320
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
230
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
231
|
+
console.log("");
|
|
232
|
+
console.log("\u{1F507} To disable these messages:");
|
|
233
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
321
234
|
console.log(`${separator}
|
|
322
235
|
`);
|
|
323
236
|
} else {
|
|
@@ -331,7 +244,10 @@ ${separator}`);
|
|
|
331
244
|
` API Key: ${env.LANGWATCH_API_KEY.length > 0 ? "Configured" : "Not configured"}`
|
|
332
245
|
);
|
|
333
246
|
console.log("");
|
|
334
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
247
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
248
|
+
console.log("");
|
|
249
|
+
console.log("\u{1F507} To disable these messages:");
|
|
250
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
335
251
|
console.log(`${separator}
|
|
336
252
|
`);
|
|
337
253
|
}
|
|
@@ -355,7 +271,7 @@ ${separator}`);
|
|
|
355
271
|
|
|
356
272
|
// src/events/schema.ts
|
|
357
273
|
import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
|
|
358
|
-
import { z as
|
|
274
|
+
import { z as z2 } from "zod";
|
|
359
275
|
var Verdict = /* @__PURE__ */ ((Verdict2) => {
|
|
360
276
|
Verdict2["SUCCESS"] = "success";
|
|
361
277
|
Verdict2["FAILURE"] = "failure";
|
|
@@ -371,59 +287,59 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
|
|
|
371
287
|
ScenarioRunStatus2["FAILED"] = "FAILED";
|
|
372
288
|
return ScenarioRunStatus2;
|
|
373
289
|
})(ScenarioRunStatus || {});
|
|
374
|
-
var baseEventSchema =
|
|
375
|
-
type:
|
|
376
|
-
timestamp:
|
|
377
|
-
rawEvent:
|
|
290
|
+
var baseEventSchema = z2.object({
|
|
291
|
+
type: z2.nativeEnum(EventType),
|
|
292
|
+
timestamp: z2.number(),
|
|
293
|
+
rawEvent: z2.any().optional()
|
|
378
294
|
});
|
|
379
|
-
var batchRunIdSchema =
|
|
380
|
-
var scenarioRunIdSchema =
|
|
381
|
-
var scenarioIdSchema =
|
|
295
|
+
var batchRunIdSchema = z2.string();
|
|
296
|
+
var scenarioRunIdSchema = z2.string();
|
|
297
|
+
var scenarioIdSchema = z2.string();
|
|
382
298
|
var baseScenarioEventSchema = baseEventSchema.extend({
|
|
383
299
|
batchRunId: batchRunIdSchema,
|
|
384
300
|
scenarioId: scenarioIdSchema,
|
|
385
301
|
scenarioRunId: scenarioRunIdSchema,
|
|
386
|
-
scenarioSetId:
|
|
302
|
+
scenarioSetId: z2.string().optional().default("default")
|
|
387
303
|
});
|
|
388
304
|
var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
|
|
389
|
-
type:
|
|
390
|
-
metadata:
|
|
391
|
-
name:
|
|
392
|
-
description:
|
|
305
|
+
type: z2.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
|
|
306
|
+
metadata: z2.object({
|
|
307
|
+
name: z2.string().optional(),
|
|
308
|
+
description: z2.string().optional()
|
|
393
309
|
})
|
|
394
310
|
});
|
|
395
|
-
var scenarioResultsSchema =
|
|
396
|
-
verdict:
|
|
397
|
-
reasoning:
|
|
398
|
-
metCriteria:
|
|
399
|
-
unmetCriteria:
|
|
400
|
-
error:
|
|
311
|
+
var scenarioResultsSchema = z2.object({
|
|
312
|
+
verdict: z2.nativeEnum(Verdict),
|
|
313
|
+
reasoning: z2.string().optional(),
|
|
314
|
+
metCriteria: z2.array(z2.string()),
|
|
315
|
+
unmetCriteria: z2.array(z2.string()),
|
|
316
|
+
error: z2.string().optional()
|
|
401
317
|
});
|
|
402
318
|
var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
|
|
403
|
-
type:
|
|
404
|
-
status:
|
|
319
|
+
type: z2.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
|
|
320
|
+
status: z2.nativeEnum(ScenarioRunStatus),
|
|
405
321
|
results: scenarioResultsSchema.optional().nullable()
|
|
406
322
|
});
|
|
407
323
|
var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
|
|
408
324
|
baseScenarioEventSchema.extend({
|
|
409
|
-
type:
|
|
325
|
+
type: z2.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
|
|
410
326
|
})
|
|
411
327
|
);
|
|
412
|
-
var scenarioEventSchema =
|
|
328
|
+
var scenarioEventSchema = z2.discriminatedUnion("type", [
|
|
413
329
|
scenarioRunStartedSchema,
|
|
414
330
|
scenarioRunFinishedSchema,
|
|
415
331
|
scenarioMessageSnapshotSchema
|
|
416
332
|
]);
|
|
417
|
-
var successSchema =
|
|
418
|
-
var errorSchema =
|
|
419
|
-
var stateSchema =
|
|
420
|
-
state:
|
|
421
|
-
messages:
|
|
422
|
-
status:
|
|
333
|
+
var successSchema = z2.object({ success: z2.boolean() });
|
|
334
|
+
var errorSchema = z2.object({ error: z2.string() });
|
|
335
|
+
var stateSchema = z2.object({
|
|
336
|
+
state: z2.object({
|
|
337
|
+
messages: z2.array(z2.any()),
|
|
338
|
+
status: z2.string()
|
|
423
339
|
})
|
|
424
340
|
});
|
|
425
|
-
var runsSchema =
|
|
426
|
-
var eventsSchema =
|
|
341
|
+
var runsSchema = z2.object({ runs: z2.array(z2.string()) });
|
|
342
|
+
var eventsSchema = z2.object({ events: z2.array(scenarioEventSchema) });
|
|
427
343
|
|
|
428
344
|
// src/events/event-reporter.ts
|
|
429
345
|
var EventReporter = class {
|
|
@@ -618,8 +534,6 @@ export {
|
|
|
618
534
|
DEFAULT_MAX_TURNS,
|
|
619
535
|
DEFAULT_VERBOSE,
|
|
620
536
|
domain_exports,
|
|
621
|
-
Logger,
|
|
622
|
-
env,
|
|
623
537
|
getProjectConfig,
|
|
624
538
|
generateThreadId,
|
|
625
539
|
generateScenarioRunId,
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Logger
|
|
3
|
+
} from "./chunk-YPJZSK4J.mjs";
|
|
4
|
+
|
|
5
|
+
// src/integrations/vitest/reporter.ts
|
|
6
|
+
import fs from "fs";
|
|
7
|
+
import path from "path";
|
|
8
|
+
import chalk from "chalk";
|
|
9
|
+
var logger = Logger.create("integrations:vitest:reporter");
|
|
10
|
+
function getProjectRoot() {
|
|
11
|
+
return process.cwd();
|
|
12
|
+
}
|
|
13
|
+
var projectRoot = getProjectRoot();
|
|
14
|
+
var logDir = path.join(projectRoot, ".scenario");
|
|
15
|
+
if (!fs.existsSync(logDir)) fs.mkdirSync(logDir);
|
|
16
|
+
function getLogFilePath(testId) {
|
|
17
|
+
return path.join(logDir, `${testId}.log`);
|
|
18
|
+
}
|
|
19
|
+
function getFullTestName(task) {
|
|
20
|
+
let name = task.name;
|
|
21
|
+
let parent = task.suite;
|
|
22
|
+
while (parent) {
|
|
23
|
+
name = `${parent.name} > ${name}`;
|
|
24
|
+
parent = parent.suite;
|
|
25
|
+
}
|
|
26
|
+
return name;
|
|
27
|
+
}
|
|
28
|
+
var VitestReporter = class {
|
|
29
|
+
results = [];
|
|
30
|
+
async onTestCaseResult(test) {
|
|
31
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
32
|
+
const fullName = getFullTestName(test);
|
|
33
|
+
const filePath = getLogFilePath(test.id);
|
|
34
|
+
if (!fs.existsSync(filePath)) {
|
|
35
|
+
logger.warn(
|
|
36
|
+
`No log file found ${filePath} for test ${fullName}`,
|
|
37
|
+
test.id
|
|
38
|
+
);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
|
|
42
|
+
const events = lines.map((line) => JSON.parse(line));
|
|
43
|
+
const runs = /* @__PURE__ */ new Map();
|
|
44
|
+
for (const event of events) {
|
|
45
|
+
const runId = event.scenarioRunId ?? "unknown";
|
|
46
|
+
if (!runs.has(runId)) runs.set(runId, []);
|
|
47
|
+
runs.get(runId).push(event);
|
|
48
|
+
}
|
|
49
|
+
for (const [runId, runEvents] of Array.from(runs.entries())) {
|
|
50
|
+
const started = runEvents.find(
|
|
51
|
+
(e) => e.type === "SCENARIO_RUN_STARTED"
|
|
52
|
+
);
|
|
53
|
+
const finished = runEvents.find(
|
|
54
|
+
(e) => e.type === "SCENARIO_RUN_FINISHED"
|
|
55
|
+
);
|
|
56
|
+
const messages = runEvents.filter(
|
|
57
|
+
(e) => e.type === "SCENARIO_MESSAGE_SNAPSHOT"
|
|
58
|
+
);
|
|
59
|
+
this.results.push({
|
|
60
|
+
name: ((_a = started == null ? void 0 : started.metadata) == null ? void 0 : _a.name) ?? fullName,
|
|
61
|
+
status: (finished == null ? void 0 : finished.status) ?? "UNKNOWN",
|
|
62
|
+
duration: started && finished ? finished.timestamp - started.timestamp : 0,
|
|
63
|
+
reasoning: (_b = finished == null ? void 0 : finished.results) == null ? void 0 : _b.reasoning,
|
|
64
|
+
criteria: (finished == null ? void 0 : finished.results) ? `Success Criteria: ${((_c = finished.results.metCriteria) == null ? void 0 : _c.length) ?? 0}/${(((_d = finished.results.metCriteria) == null ? void 0 : _d.length) ?? 0) + (((_e = finished.results.unmetCriteria) == null ? void 0 : _e.length) ?? 0)}` : void 0
|
|
65
|
+
});
|
|
66
|
+
console.log(
|
|
67
|
+
`
|
|
68
|
+
--- Scenario Run: ${((_f = started == null ? void 0 : started.metadata) == null ? void 0 : _f.name) ?? runId} ---`
|
|
69
|
+
);
|
|
70
|
+
if (started) {
|
|
71
|
+
console.log(`Description: ${((_g = started.metadata) == null ? void 0 : _g.description) ?? ""}`);
|
|
72
|
+
}
|
|
73
|
+
if (messages.length) {
|
|
74
|
+
console.log("Chat log:");
|
|
75
|
+
let lastMessageCount = 0;
|
|
76
|
+
for (const msg of messages) {
|
|
77
|
+
const allMessages = msg.messages ?? [];
|
|
78
|
+
for (const m of allMessages.slice(lastMessageCount)) {
|
|
79
|
+
const role = m.role;
|
|
80
|
+
let roleLabel = role;
|
|
81
|
+
if (role.toLowerCase() === "user") roleLabel = chalk.green("User");
|
|
82
|
+
else if (role.toLowerCase() === "agent")
|
|
83
|
+
roleLabel = chalk.cyan("Agent");
|
|
84
|
+
else if (role.toLowerCase() === "assistant")
|
|
85
|
+
roleLabel = chalk.cyan("Assistant");
|
|
86
|
+
else roleLabel = chalk.yellow(role);
|
|
87
|
+
console.log(`${roleLabel}: ${m.content}`);
|
|
88
|
+
}
|
|
89
|
+
lastMessageCount = allMessages.length;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (finished) {
|
|
93
|
+
console.log("--- Verdict ---");
|
|
94
|
+
console.log(`Status: ${finished.status}`);
|
|
95
|
+
if (finished.results) {
|
|
96
|
+
console.log(`Verdict: ${finished.results.verdict}`);
|
|
97
|
+
if (finished.results.reasoning)
|
|
98
|
+
console.log(`Reasoning: ${finished.results.reasoning}`);
|
|
99
|
+
if ((_h = finished.results.metCriteria) == null ? void 0 : _h.length)
|
|
100
|
+
console.log(
|
|
101
|
+
`Met criteria: ${finished.results.metCriteria.join(", ")}`
|
|
102
|
+
);
|
|
103
|
+
if ((_i = finished.results.unmetCriteria) == null ? void 0 : _i.length)
|
|
104
|
+
console.log(
|
|
105
|
+
`Unmet criteria: ${finished.results.unmetCriteria.join(", ")}`
|
|
106
|
+
);
|
|
107
|
+
if (finished.results.error)
|
|
108
|
+
console.log(`Error: ${finished.results.error}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
console.log("-----------------------------\n");
|
|
112
|
+
}
|
|
113
|
+
fs.unlinkSync(filePath);
|
|
114
|
+
}
|
|
115
|
+
async onTestRunEnd() {
|
|
116
|
+
if (this.results.length === 0) return;
|
|
117
|
+
const total = this.results.length;
|
|
118
|
+
const passed = this.results.filter((r) => r.status === "SUCCESS").length;
|
|
119
|
+
const failed = this.results.filter((r) => r.status !== "SUCCESS").length;
|
|
120
|
+
const successRate = (passed / total * 100).toFixed(1);
|
|
121
|
+
console.log();
|
|
122
|
+
console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
|
|
123
|
+
console.log(`Total Scenarios: ${total}`);
|
|
124
|
+
console.log(chalk.green(`Passed: ${passed}`));
|
|
125
|
+
console.log(chalk.red(`Failed: ${failed}`));
|
|
126
|
+
console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
|
|
127
|
+
this.results.forEach((r, i) => {
|
|
128
|
+
const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
|
|
129
|
+
console.log();
|
|
130
|
+
console.log(
|
|
131
|
+
`${i + 1}. ${r.name} - ${statusColor(r.status)} in ${(r.duration / 1e3).toFixed(2)}s`
|
|
132
|
+
);
|
|
133
|
+
if (r.reasoning) {
|
|
134
|
+
console.log(chalk.greenBright(" Reasoning: ") + r.reasoning);
|
|
135
|
+
}
|
|
136
|
+
if (r.criteria) {
|
|
137
|
+
console.log(chalk.bold(" " + r.criteria));
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
console.log();
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
export {
|
|
145
|
+
VitestReporter
|
|
146
|
+
};
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// src/config/env.ts
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
// src/config/log-levels.ts
|
|
5
|
+
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
6
|
+
LogLevel2["ERROR"] = "ERROR";
|
|
7
|
+
LogLevel2["WARN"] = "WARN";
|
|
8
|
+
LogLevel2["INFO"] = "INFO";
|
|
9
|
+
LogLevel2["DEBUG"] = "DEBUG";
|
|
10
|
+
return LogLevel2;
|
|
11
|
+
})(LogLevel || {});
|
|
12
|
+
|
|
13
|
+
// src/config/env.ts
|
|
14
|
+
var envSchema = z.object({
|
|
15
|
+
/**
|
|
16
|
+
* LangWatch API key for event reporting.
|
|
17
|
+
* If not provided, events will not be sent to LangWatch.
|
|
18
|
+
*/
|
|
19
|
+
LANGWATCH_API_KEY: z.string().optional(),
|
|
20
|
+
/**
|
|
21
|
+
* LangWatch endpoint URL for event reporting.
|
|
22
|
+
* Defaults to the production LangWatch endpoint.
|
|
23
|
+
*/
|
|
24
|
+
LANGWATCH_ENDPOINT: z.string().url().default("https://app.langwatch.ai"),
|
|
25
|
+
/**
|
|
26
|
+
* Disables simulation report info messages when set to any truthy value.
|
|
27
|
+
* Useful for CI/CD environments or when you want cleaner output.
|
|
28
|
+
*/
|
|
29
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z.string().optional().transform((val) => Boolean(val)),
|
|
30
|
+
/**
|
|
31
|
+
* Node environment - affects logging and behavior.
|
|
32
|
+
* Defaults to 'development' if not specified.
|
|
33
|
+
*/
|
|
34
|
+
NODE_ENV: z.enum(["development", "production", "test"]).default("development"),
|
|
35
|
+
/**
|
|
36
|
+
* Log level for the scenario package.
|
|
37
|
+
* Defaults to 'info' if not specified.
|
|
38
|
+
*/
|
|
39
|
+
LOG_LEVEL: z.nativeEnum(LogLevel).optional(),
|
|
40
|
+
/**
|
|
41
|
+
* Scenario batch run ID.
|
|
42
|
+
* If not provided, a random ID will be generated.
|
|
43
|
+
*/
|
|
44
|
+
SCENARIO_BATCH_RUN_ID: z.string().optional()
|
|
45
|
+
});
|
|
46
|
+
var env = envSchema.parse(process.env);
|
|
47
|
+
|
|
48
|
+
// src/utils/logger.ts
|
|
49
|
+
var Logger = class _Logger {
|
|
50
|
+
constructor(context) {
|
|
51
|
+
this.context = context;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Creates a logger with context (e.g., class name)
|
|
55
|
+
*/
|
|
56
|
+
static create(context) {
|
|
57
|
+
return new _Logger(context);
|
|
58
|
+
}
|
|
59
|
+
getLogLevel() {
|
|
60
|
+
return env.LOG_LEVEL ?? "INFO" /* INFO */;
|
|
61
|
+
}
|
|
62
|
+
getLogLevelIndex(level) {
|
|
63
|
+
return Object.values(LogLevel).indexOf(level);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Checks if logging should occur based on LOG_LEVEL env var
|
|
67
|
+
*/
|
|
68
|
+
shouldLog(level) {
|
|
69
|
+
const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
|
|
70
|
+
const requestedLevelIndex = this.getLogLevelIndex(level);
|
|
71
|
+
return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
|
|
72
|
+
}
|
|
73
|
+
formatMessage(message) {
|
|
74
|
+
return this.context ? `[${this.context}] ${message}` : message;
|
|
75
|
+
}
|
|
76
|
+
error(message, data) {
|
|
77
|
+
if (this.shouldLog("ERROR" /* ERROR */)) {
|
|
78
|
+
const formattedMessage = this.formatMessage(message);
|
|
79
|
+
if (data) {
|
|
80
|
+
console.error(formattedMessage, data);
|
|
81
|
+
} else {
|
|
82
|
+
console.error(formattedMessage);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
warn(message, data) {
|
|
87
|
+
if (this.shouldLog("WARN" /* WARN */)) {
|
|
88
|
+
const formattedMessage = this.formatMessage(message);
|
|
89
|
+
if (data) {
|
|
90
|
+
console.warn(formattedMessage, data);
|
|
91
|
+
} else {
|
|
92
|
+
console.warn(formattedMessage);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
info(message, data) {
|
|
97
|
+
if (this.shouldLog("INFO" /* INFO */)) {
|
|
98
|
+
const formattedMessage = this.formatMessage(message);
|
|
99
|
+
if (data) {
|
|
100
|
+
console.info(formattedMessage, data);
|
|
101
|
+
} else {
|
|
102
|
+
console.info(formattedMessage);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
debug(message, data) {
|
|
107
|
+
if (this.shouldLog("DEBUG" /* DEBUG */)) {
|
|
108
|
+
const formattedMessage = this.formatMessage(message);
|
|
109
|
+
if (data) {
|
|
110
|
+
console.log(formattedMessage, data);
|
|
111
|
+
} else {
|
|
112
|
+
console.log(formattedMessage);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
export {
|
|
119
|
+
env,
|
|
120
|
+
Logger
|
|
121
|
+
};
|
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
2
|
+
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
import { Observable } from 'rxjs';
|
|
5
5
|
|
|
@@ -331,7 +331,12 @@ interface ScenarioExecutionStateLike {
|
|
|
331
331
|
* Retrieves the last user message from the execution state.
|
|
332
332
|
* @returns The last user message.
|
|
333
333
|
*/
|
|
334
|
-
lastUserMessage():
|
|
334
|
+
lastUserMessage(): CoreUserMessage;
|
|
335
|
+
/**
|
|
336
|
+
* Retrieves the last agent message from the execution state.
|
|
337
|
+
* @returns The last agent message.
|
|
338
|
+
*/
|
|
339
|
+
lastAgentMessage(): CoreAssistantMessage;
|
|
335
340
|
/**
|
|
336
341
|
* Retrieves the last tool call message for a specific tool.
|
|
337
342
|
* @param toolName - The name of the tool.
|
|
@@ -1203,7 +1208,8 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1203
1208
|
*/
|
|
1204
1209
|
addMessage(message: CoreMessage): void;
|
|
1205
1210
|
lastMessage(): CoreMessage;
|
|
1206
|
-
lastUserMessage():
|
|
1211
|
+
lastUserMessage(): CoreUserMessage;
|
|
1212
|
+
lastAgentMessage(): CoreAssistantMessage;
|
|
1207
1213
|
lastToolCall(toolName: string): CoreToolMessage;
|
|
1208
1214
|
hasToolCall(toolName: string): boolean;
|
|
1209
1215
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import { CoreMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
2
|
+
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel } from 'ai';
|
|
3
3
|
import { z } from 'zod';
|
|
4
4
|
import { Observable } from 'rxjs';
|
|
5
5
|
|
|
@@ -331,7 +331,12 @@ interface ScenarioExecutionStateLike {
|
|
|
331
331
|
* Retrieves the last user message from the execution state.
|
|
332
332
|
* @returns The last user message.
|
|
333
333
|
*/
|
|
334
|
-
lastUserMessage():
|
|
334
|
+
lastUserMessage(): CoreUserMessage;
|
|
335
|
+
/**
|
|
336
|
+
* Retrieves the last agent message from the execution state.
|
|
337
|
+
* @returns The last agent message.
|
|
338
|
+
*/
|
|
339
|
+
lastAgentMessage(): CoreAssistantMessage;
|
|
335
340
|
/**
|
|
336
341
|
* Retrieves the last tool call message for a specific tool.
|
|
337
342
|
* @param toolName - The name of the tool.
|
|
@@ -1203,7 +1208,8 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1203
1208
|
*/
|
|
1204
1209
|
addMessage(message: CoreMessage): void;
|
|
1205
1210
|
lastMessage(): CoreMessage;
|
|
1206
|
-
lastUserMessage():
|
|
1211
|
+
lastUserMessage(): CoreUserMessage;
|
|
1212
|
+
lastAgentMessage(): CoreAssistantMessage;
|
|
1207
1213
|
lastToolCall(toolName: string): CoreToolMessage;
|
|
1208
1214
|
hasToolCall(toolName: string): boolean;
|
|
1209
1215
|
}
|