@langwatch/scenario 0.2.2 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -9
- package/dist/{chunk-NUZZAQV2.mjs → chunk-7H6OGEQ5.mjs} +85 -163
- package/dist/chunk-K7KLHTDI.mjs +146 -0
- package/dist/chunk-YPJZSK4J.mjs +121 -0
- package/dist/index.d.mts +86 -72
- package/dist/index.d.ts +86 -72
- package/dist/index.js +131 -82
- package/dist/index.mjs +40 -24
- package/dist/integrations/vitest/config.d.mts +5 -0
- package/dist/integrations/vitest/config.d.ts +5 -0
- package/dist/integrations/vitest/config.js +324 -0
- package/dist/integrations/vitest/config.mjs +35 -0
- package/dist/integrations/vitest/reporter.js +124 -1
- package/dist/integrations/vitest/reporter.mjs +4 -135
- package/dist/integrations/vitest/setup-global.d.mts +3 -0
- package/dist/integrations/vitest/setup-global.d.ts +3 -0
- package/dist/integrations/vitest/setup-global.js +30 -0
- package/dist/integrations/vitest/setup-global.mjs +11 -0
- package/dist/integrations/vitest/setup.js +97 -67
- package/dist/integrations/vitest/setup.mjs +7 -3
- package/package.json +13 -4
package/README.md
CHANGED
|
@@ -97,7 +97,7 @@ describe("Weather Agent", () => {
|
|
|
97
97
|
role: AgentRole.AGENT,
|
|
98
98
|
call: async (input) => {
|
|
99
99
|
const response = await generateText({
|
|
100
|
-
model: openai("gpt-4.1
|
|
100
|
+
model: openai("gpt-4.1"),
|
|
101
101
|
system: `You are a helpful assistant that may help the user with weather information.`,
|
|
102
102
|
messages: input.messages,
|
|
103
103
|
tools: { get_current_weather: getCurrentWeather },
|
|
@@ -122,7 +122,7 @@ describe("Weather Agent", () => {
|
|
|
122
122
|
description: "The user asks for the weather in a specific city, and the agent should use the weather tool to find it.",
|
|
123
123
|
agents: [
|
|
124
124
|
weatherAgent,
|
|
125
|
-
scenario.userSimulatorAgent({ model: openai("gpt-4.1
|
|
125
|
+
scenario.userSimulatorAgent({ model: openai("gpt-4.1") }),
|
|
126
126
|
],
|
|
127
127
|
script: [
|
|
128
128
|
scenario.user("What's the weather like in Barcelona?"),
|
|
@@ -222,10 +222,6 @@ export default defineConfig({
|
|
|
222
222
|
model: openai("gpt-4o-mini"),
|
|
223
223
|
temperature: 0.1,
|
|
224
224
|
},
|
|
225
|
-
|
|
226
|
-
// Configure the LangWatch reporting endpoint and API key
|
|
227
|
-
langwatchEndpoint: "https://app.langwatch.ai",
|
|
228
|
-
langwatchApiKey: process.env.LANGWATCH_API_KEY,
|
|
229
225
|
});
|
|
230
226
|
```
|
|
231
227
|
|
|
@@ -239,14 +235,12 @@ The following configuration options are all optional. You can specify any combin
|
|
|
239
235
|
- `model`: **(Required if `defaultModel` is set)** An instance of a language model from a provider like `@ai-sdk/openai`.
|
|
240
236
|
- `temperature` _(Optional)_: The default temperature for the model (e.g., `0.1`).
|
|
241
237
|
- `maxTokens` _(Optional)_: The default maximum number of tokens for the model to generate.
|
|
242
|
-
- `langwatchEndpoint` _(Optional)_: The endpoint for the LangWatch reporting service. If not specified, it defaults to the `LANGWATCH_ENDPOINT` environment variable, or `https://app.langwatch.ai`.
|
|
243
|
-
- `langwatchApiKey` _(Optional)_: Your LangWatch API key for authenticating with the reporting service. If not specified, it defaults to the `LANGWATCH_API_KEY` environment variable.
|
|
244
238
|
|
|
245
239
|
### Environment Variables
|
|
246
240
|
|
|
247
241
|
You can control the library's behavior with the following environment variables:
|
|
248
242
|
|
|
249
|
-
- `
|
|
243
|
+
- `LOG_LEVEL`: Sets the verbosity of the internal logger. Can be `error`, `warn`, `info`, or `debug`. By default, logging is silent.
|
|
250
244
|
- `SCENARIO_DISABLE_SIMULATION_REPORT_INFO`: Set to `true` to disable the "Scenario Simulation Reporting" banner that is printed to the console when a test run starts.
|
|
251
245
|
- `LANGWATCH_API_KEY`: Your LangWatch API key. This is used as a fallback if `langwatchApiKey` is not set in your config file.
|
|
252
246
|
- `LANGWATCH_ENDPOINT`: The LangWatch reporting endpoint. This is used as a fallback if `langwatchEndpoint` is not set in your config file.
|
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Logger,
|
|
3
|
+
env
|
|
4
|
+
} from "./chunk-YPJZSK4J.mjs";
|
|
1
5
|
import {
|
|
2
6
|
__export
|
|
3
7
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -43,12 +47,18 @@ var JudgeAgentAdapter = class {
|
|
|
43
47
|
}
|
|
44
48
|
};
|
|
45
49
|
|
|
50
|
+
// src/domain/scenarios/index.ts
|
|
51
|
+
var DEFAULT_MAX_TURNS = 10;
|
|
52
|
+
var DEFAULT_VERBOSE = false;
|
|
53
|
+
|
|
46
54
|
// src/domain/index.ts
|
|
47
55
|
var domain_exports = {};
|
|
48
56
|
__export(domain_exports, {
|
|
49
57
|
AgentAdapter: () => AgentAdapter,
|
|
50
58
|
AgentRole: () => AgentRole,
|
|
59
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
51
60
|
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
61
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
52
62
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
53
63
|
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
54
64
|
allAgentRoles: () => allAgentRoles,
|
|
@@ -99,124 +109,7 @@ async function loadScenarioProjectConfig() {
|
|
|
99
109
|
return await scenarioProjectConfigSchema.parseAsync({});
|
|
100
110
|
}
|
|
101
111
|
|
|
102
|
-
// src/
|
|
103
|
-
var Logger = class _Logger {
|
|
104
|
-
constructor(context) {
|
|
105
|
-
this.context = context;
|
|
106
|
-
}
|
|
107
|
-
/**
|
|
108
|
-
* Creates a logger with context (e.g., class name)
|
|
109
|
-
*/
|
|
110
|
-
static create(context) {
|
|
111
|
-
return new _Logger(context);
|
|
112
|
-
}
|
|
113
|
-
getLogLevel() {
|
|
114
|
-
return env.SCENARIO_LOG_LEVEL ?? "INFO" /* INFO */;
|
|
115
|
-
}
|
|
116
|
-
getLogLevelIndex(level) {
|
|
117
|
-
return Object.values(LogLevel).indexOf(level);
|
|
118
|
-
}
|
|
119
|
-
/**
|
|
120
|
-
* Checks if logging should occur based on LOG_LEVEL env var
|
|
121
|
-
*/
|
|
122
|
-
shouldLog(level) {
|
|
123
|
-
const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
|
|
124
|
-
const requestedLevelIndex = this.getLogLevelIndex(level);
|
|
125
|
-
return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
|
|
126
|
-
}
|
|
127
|
-
formatMessage(message) {
|
|
128
|
-
return this.context ? `[${this.context}] ${message}` : message;
|
|
129
|
-
}
|
|
130
|
-
error(message, data) {
|
|
131
|
-
if (this.shouldLog("ERROR" /* ERROR */)) {
|
|
132
|
-
const formattedMessage = this.formatMessage(message);
|
|
133
|
-
if (data) {
|
|
134
|
-
console.error(formattedMessage, data);
|
|
135
|
-
} else {
|
|
136
|
-
console.error(formattedMessage);
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
warn(message, data) {
|
|
141
|
-
if (this.shouldLog("WARN" /* WARN */)) {
|
|
142
|
-
const formattedMessage = this.formatMessage(message);
|
|
143
|
-
if (data) {
|
|
144
|
-
console.warn(formattedMessage, data);
|
|
145
|
-
} else {
|
|
146
|
-
console.warn(formattedMessage);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
info(message, data) {
|
|
151
|
-
if (this.shouldLog("INFO" /* INFO */)) {
|
|
152
|
-
const formattedMessage = this.formatMessage(message);
|
|
153
|
-
if (data) {
|
|
154
|
-
console.info(formattedMessage, data);
|
|
155
|
-
} else {
|
|
156
|
-
console.info(formattedMessage);
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
|
-
debug(message, data) {
|
|
161
|
-
if (this.shouldLog("DEBUG" /* DEBUG */)) {
|
|
162
|
-
const formattedMessage = this.formatMessage(message);
|
|
163
|
-
if (data) {
|
|
164
|
-
console.log(formattedMessage, data);
|
|
165
|
-
} else {
|
|
166
|
-
console.log(formattedMessage);
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
};
|
|
171
|
-
|
|
172
|
-
// src/config/env.ts
|
|
173
|
-
import { z as z2 } from "zod";
|
|
174
|
-
|
|
175
|
-
// src/config/log-levels.ts
|
|
176
|
-
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
177
|
-
LogLevel2["ERROR"] = "ERROR";
|
|
178
|
-
LogLevel2["WARN"] = "WARN";
|
|
179
|
-
LogLevel2["INFO"] = "INFO";
|
|
180
|
-
LogLevel2["DEBUG"] = "DEBUG";
|
|
181
|
-
return LogLevel2;
|
|
182
|
-
})(LogLevel || {});
|
|
183
|
-
|
|
184
|
-
// src/config/env.ts
|
|
185
|
-
var envSchema = z2.object({
|
|
186
|
-
/**
|
|
187
|
-
* LangWatch API key for event reporting.
|
|
188
|
-
* If not provided, events will not be sent to LangWatch.
|
|
189
|
-
*/
|
|
190
|
-
LANGWATCH_API_KEY: z2.string().optional(),
|
|
191
|
-
/**
|
|
192
|
-
* LangWatch endpoint URL for event reporting.
|
|
193
|
-
* Defaults to the production LangWatch endpoint.
|
|
194
|
-
*/
|
|
195
|
-
LANGWATCH_ENDPOINT: z2.string().url().default("https://app.langwatch.ai"),
|
|
196
|
-
/**
|
|
197
|
-
* Disables simulation report info messages when set to any truthy value.
|
|
198
|
-
* Useful for CI/CD environments or when you want cleaner output.
|
|
199
|
-
*/
|
|
200
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z2.string().optional().transform((val) => Boolean(val)),
|
|
201
|
-
/**
|
|
202
|
-
* Node environment - affects logging and behavior.
|
|
203
|
-
* Defaults to 'development' if not specified.
|
|
204
|
-
*/
|
|
205
|
-
NODE_ENV: z2.enum(["development", "production", "test"]).default("development"),
|
|
206
|
-
/**
|
|
207
|
-
* Log level for the scenario package.
|
|
208
|
-
* Defaults to 'info' if not specified.
|
|
209
|
-
*/
|
|
210
|
-
SCENARIO_LOG_LEVEL: z2.nativeEnum(LogLevel).optional(),
|
|
211
|
-
/**
|
|
212
|
-
* Scenario batch run ID.
|
|
213
|
-
* If not provided, a random ID will be generated.
|
|
214
|
-
*/
|
|
215
|
-
SCENARIO_BATCH_RUN_ID: z2.string().optional()
|
|
216
|
-
});
|
|
217
|
-
var env = envSchema.parse(process.env);
|
|
218
|
-
|
|
219
|
-
// src/config/index.ts
|
|
112
|
+
// src/config/get-project-config.ts
|
|
220
113
|
var logger = new Logger("scenario.config");
|
|
221
114
|
var configLoaded = false;
|
|
222
115
|
var config = null;
|
|
@@ -231,7 +124,7 @@ async function loadProjectConfig() {
|
|
|
231
124
|
configLoadPromise = (async () => {
|
|
232
125
|
try {
|
|
233
126
|
config = await loadScenarioProjectConfig();
|
|
234
|
-
logger.
|
|
127
|
+
logger.debug("loaded scenario project config", { config });
|
|
235
128
|
} catch (error) {
|
|
236
129
|
logger.error("error loading scenario project config", { error });
|
|
237
130
|
} finally {
|
|
@@ -246,7 +139,10 @@ async function getProjectConfig() {
|
|
|
246
139
|
}
|
|
247
140
|
|
|
248
141
|
// src/utils/ids.ts
|
|
142
|
+
import crypto from "node:crypto";
|
|
143
|
+
import process2 from "node:process";
|
|
249
144
|
import { generate, parse } from "xksuid";
|
|
145
|
+
var batchRunId;
|
|
250
146
|
function generateThreadId() {
|
|
251
147
|
return `thread_${generate()}`;
|
|
252
148
|
}
|
|
@@ -257,10 +153,31 @@ function generateScenarioId() {
|
|
|
257
153
|
return `scenario_${generate()}`;
|
|
258
154
|
}
|
|
259
155
|
function getBatchRunId() {
|
|
260
|
-
if (
|
|
261
|
-
|
|
262
|
-
}
|
|
263
|
-
|
|
156
|
+
if (batchRunId) {
|
|
157
|
+
return batchRunId;
|
|
158
|
+
}
|
|
159
|
+
if (process2.env.SCENARIO_BATCH_RUN_ID) {
|
|
160
|
+
console.log("process.env.SCENARIO_BATCH_RUN_ID", process2.env.SCENARIO_BATCH_RUN_ID);
|
|
161
|
+
return batchRunId = process2.env.SCENARIO_BATCH_RUN_ID;
|
|
162
|
+
}
|
|
163
|
+
if (process2.env.VITEST_WORKER_ID || process2.env.JEST_WORKER_ID) {
|
|
164
|
+
const parentProcessId = process2.ppid;
|
|
165
|
+
const now = /* @__PURE__ */ new Date();
|
|
166
|
+
const year = now.getUTCFullYear();
|
|
167
|
+
const week = String(getISOWeekNumber(now)).padStart(2, "0");
|
|
168
|
+
const raw = `${parentProcessId}_${year}_w${week}`;
|
|
169
|
+
const hash = crypto.createHash("sha256").update(raw).digest("hex").slice(0, 12);
|
|
170
|
+
return batchRunId = `scenariobatchrun_${hash}`;
|
|
171
|
+
}
|
|
172
|
+
return batchRunId = `scenariobatchrun_${generate()}`;
|
|
173
|
+
}
|
|
174
|
+
function getISOWeekNumber(date) {
|
|
175
|
+
const tmp = new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate()));
|
|
176
|
+
const dayNum = tmp.getUTCDay() || 7;
|
|
177
|
+
tmp.setUTCDate(tmp.getUTCDate() + 4 - dayNum);
|
|
178
|
+
const yearStart = new Date(Date.UTC(tmp.getUTCFullYear(), 0, 1));
|
|
179
|
+
const weekNo = Math.ceil(((tmp.getTime() - yearStart.getTime()) / 864e5 + 1) / 7);
|
|
180
|
+
return weekNo;
|
|
264
181
|
}
|
|
265
182
|
function generateMessageId() {
|
|
266
183
|
return `scenariomsg_${generate()}`;
|
|
@@ -277,12 +194,11 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
277
194
|
if (this.isGreetingDisabled()) {
|
|
278
195
|
return;
|
|
279
196
|
}
|
|
280
|
-
|
|
281
|
-
if (_EventAlertMessageLogger.shownBatchIds.has(batchRunId)) {
|
|
197
|
+
if (_EventAlertMessageLogger.shownBatchIds.has(getBatchRunId())) {
|
|
282
198
|
return;
|
|
283
199
|
}
|
|
284
|
-
_EventAlertMessageLogger.shownBatchIds.add(
|
|
285
|
-
this.displayGreeting(
|
|
200
|
+
_EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
|
|
201
|
+
this.displayGreeting();
|
|
286
202
|
}
|
|
287
203
|
/**
|
|
288
204
|
* Shows a fancy message about how to watch the simulation.
|
|
@@ -297,7 +213,7 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
297
213
|
isGreetingDisabled() {
|
|
298
214
|
return env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
|
|
299
215
|
}
|
|
300
|
-
displayGreeting(
|
|
216
|
+
displayGreeting() {
|
|
301
217
|
const separator = "\u2500".repeat(60);
|
|
302
218
|
if (!env.LANGWATCH_API_KEY) {
|
|
303
219
|
console.log(`
|
|
@@ -311,7 +227,10 @@ ${separator}`);
|
|
|
311
227
|
console.log(" \u2022 Set LANGWATCH_API_KEY environment variable");
|
|
312
228
|
console.log(" \u2022 Or configure apiKey in scenario.config.js");
|
|
313
229
|
console.log("");
|
|
314
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
230
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
231
|
+
console.log("");
|
|
232
|
+
console.log("\u{1F507} To disable these messages:");
|
|
233
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
315
234
|
console.log(`${separator}
|
|
316
235
|
`);
|
|
317
236
|
} else {
|
|
@@ -325,7 +244,10 @@ ${separator}`);
|
|
|
325
244
|
` API Key: ${env.LANGWATCH_API_KEY.length > 0 ? "Configured" : "Not configured"}`
|
|
326
245
|
);
|
|
327
246
|
console.log("");
|
|
328
|
-
console.log(`\u{1F4E6} Batch Run ID: ${
|
|
247
|
+
console.log(`\u{1F4E6} Batch Run ID: ${getBatchRunId()}`);
|
|
248
|
+
console.log("");
|
|
249
|
+
console.log("\u{1F507} To disable these messages:");
|
|
250
|
+
console.log(" \u2022 Set SCENARIO_DISABLE_SIMULATION_REPORT_INFO=true");
|
|
329
251
|
console.log(`${separator}
|
|
330
252
|
`);
|
|
331
253
|
}
|
|
@@ -349,7 +271,7 @@ ${separator}`);
|
|
|
349
271
|
|
|
350
272
|
// src/events/schema.ts
|
|
351
273
|
import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
|
|
352
|
-
import { z as
|
|
274
|
+
import { z as z2 } from "zod";
|
|
353
275
|
var Verdict = /* @__PURE__ */ ((Verdict2) => {
|
|
354
276
|
Verdict2["SUCCESS"] = "success";
|
|
355
277
|
Verdict2["FAILURE"] = "failure";
|
|
@@ -365,59 +287,59 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
|
|
|
365
287
|
ScenarioRunStatus2["FAILED"] = "FAILED";
|
|
366
288
|
return ScenarioRunStatus2;
|
|
367
289
|
})(ScenarioRunStatus || {});
|
|
368
|
-
var baseEventSchema =
|
|
369
|
-
type:
|
|
370
|
-
timestamp:
|
|
371
|
-
rawEvent:
|
|
290
|
+
var baseEventSchema = z2.object({
|
|
291
|
+
type: z2.nativeEnum(EventType),
|
|
292
|
+
timestamp: z2.number(),
|
|
293
|
+
rawEvent: z2.any().optional()
|
|
372
294
|
});
|
|
373
|
-
var batchRunIdSchema =
|
|
374
|
-
var scenarioRunIdSchema =
|
|
375
|
-
var scenarioIdSchema =
|
|
295
|
+
var batchRunIdSchema = z2.string();
|
|
296
|
+
var scenarioRunIdSchema = z2.string();
|
|
297
|
+
var scenarioIdSchema = z2.string();
|
|
376
298
|
var baseScenarioEventSchema = baseEventSchema.extend({
|
|
377
299
|
batchRunId: batchRunIdSchema,
|
|
378
300
|
scenarioId: scenarioIdSchema,
|
|
379
301
|
scenarioRunId: scenarioRunIdSchema,
|
|
380
|
-
scenarioSetId:
|
|
302
|
+
scenarioSetId: z2.string().optional().default("default")
|
|
381
303
|
});
|
|
382
304
|
var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
|
|
383
|
-
type:
|
|
384
|
-
metadata:
|
|
385
|
-
name:
|
|
386
|
-
description:
|
|
305
|
+
type: z2.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
|
|
306
|
+
metadata: z2.object({
|
|
307
|
+
name: z2.string().optional(),
|
|
308
|
+
description: z2.string().optional()
|
|
387
309
|
})
|
|
388
310
|
});
|
|
389
|
-
var scenarioResultsSchema =
|
|
390
|
-
verdict:
|
|
391
|
-
reasoning:
|
|
392
|
-
metCriteria:
|
|
393
|
-
unmetCriteria:
|
|
394
|
-
error:
|
|
311
|
+
var scenarioResultsSchema = z2.object({
|
|
312
|
+
verdict: z2.nativeEnum(Verdict),
|
|
313
|
+
reasoning: z2.string().optional(),
|
|
314
|
+
metCriteria: z2.array(z2.string()),
|
|
315
|
+
unmetCriteria: z2.array(z2.string()),
|
|
316
|
+
error: z2.string().optional()
|
|
395
317
|
});
|
|
396
318
|
var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
|
|
397
|
-
type:
|
|
398
|
-
status:
|
|
319
|
+
type: z2.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
|
|
320
|
+
status: z2.nativeEnum(ScenarioRunStatus),
|
|
399
321
|
results: scenarioResultsSchema.optional().nullable()
|
|
400
322
|
});
|
|
401
323
|
var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
|
|
402
324
|
baseScenarioEventSchema.extend({
|
|
403
|
-
type:
|
|
325
|
+
type: z2.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
|
|
404
326
|
})
|
|
405
327
|
);
|
|
406
|
-
var scenarioEventSchema =
|
|
328
|
+
var scenarioEventSchema = z2.discriminatedUnion("type", [
|
|
407
329
|
scenarioRunStartedSchema,
|
|
408
330
|
scenarioRunFinishedSchema,
|
|
409
331
|
scenarioMessageSnapshotSchema
|
|
410
332
|
]);
|
|
411
|
-
var successSchema =
|
|
412
|
-
var errorSchema =
|
|
413
|
-
var stateSchema =
|
|
414
|
-
state:
|
|
415
|
-
messages:
|
|
416
|
-
status:
|
|
333
|
+
var successSchema = z2.object({ success: z2.boolean() });
|
|
334
|
+
var errorSchema = z2.object({ error: z2.string() });
|
|
335
|
+
var stateSchema = z2.object({
|
|
336
|
+
state: z2.object({
|
|
337
|
+
messages: z2.array(z2.any()),
|
|
338
|
+
status: z2.string()
|
|
417
339
|
})
|
|
418
340
|
});
|
|
419
|
-
var runsSchema =
|
|
420
|
-
var eventsSchema =
|
|
341
|
+
var runsSchema = z2.object({ runs: z2.array(z2.string()) });
|
|
342
|
+
var eventsSchema = z2.object({ events: z2.array(scenarioEventSchema) });
|
|
421
343
|
|
|
422
344
|
// src/events/event-reporter.ts
|
|
423
345
|
var EventReporter = class {
|
|
@@ -609,9 +531,9 @@ export {
|
|
|
609
531
|
AgentAdapter,
|
|
610
532
|
UserSimulatorAgentAdapter,
|
|
611
533
|
JudgeAgentAdapter,
|
|
534
|
+
DEFAULT_MAX_TURNS,
|
|
535
|
+
DEFAULT_VERBOSE,
|
|
612
536
|
domain_exports,
|
|
613
|
-
Logger,
|
|
614
|
-
env,
|
|
615
537
|
getProjectConfig,
|
|
616
538
|
generateThreadId,
|
|
617
539
|
generateScenarioRunId,
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Logger
|
|
3
|
+
} from "./chunk-YPJZSK4J.mjs";
|
|
4
|
+
|
|
5
|
+
// src/integrations/vitest/reporter.ts
|
|
6
|
+
import fs from "fs";
|
|
7
|
+
import path from "path";
|
|
8
|
+
import chalk from "chalk";
|
|
9
|
+
var logger = Logger.create("integrations:vitest:reporter");
|
|
10
|
+
function getProjectRoot() {
|
|
11
|
+
return process.cwd();
|
|
12
|
+
}
|
|
13
|
+
var projectRoot = getProjectRoot();
|
|
14
|
+
var logDir = path.join(projectRoot, ".scenario");
|
|
15
|
+
if (!fs.existsSync(logDir)) fs.mkdirSync(logDir);
|
|
16
|
+
function getLogFilePath(testId) {
|
|
17
|
+
return path.join(logDir, `${testId}.log`);
|
|
18
|
+
}
|
|
19
|
+
function getFullTestName(task) {
|
|
20
|
+
let name = task.name;
|
|
21
|
+
let parent = task.suite;
|
|
22
|
+
while (parent) {
|
|
23
|
+
name = `${parent.name} > ${name}`;
|
|
24
|
+
parent = parent.suite;
|
|
25
|
+
}
|
|
26
|
+
return name;
|
|
27
|
+
}
|
|
28
|
+
var VitestReporter = class {
|
|
29
|
+
results = [];
|
|
30
|
+
async onTestCaseResult(test) {
|
|
31
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
32
|
+
const fullName = getFullTestName(test);
|
|
33
|
+
const filePath = getLogFilePath(test.id);
|
|
34
|
+
if (!fs.existsSync(filePath)) {
|
|
35
|
+
logger.warn(
|
|
36
|
+
`No log file found ${filePath} for test ${fullName}`,
|
|
37
|
+
test.id
|
|
38
|
+
);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const lines = fs.readFileSync(filePath, "utf-8").split("\n").filter(Boolean);
|
|
42
|
+
const events = lines.map((line) => JSON.parse(line));
|
|
43
|
+
const runs = /* @__PURE__ */ new Map();
|
|
44
|
+
for (const event of events) {
|
|
45
|
+
const runId = event.scenarioRunId ?? "unknown";
|
|
46
|
+
if (!runs.has(runId)) runs.set(runId, []);
|
|
47
|
+
runs.get(runId).push(event);
|
|
48
|
+
}
|
|
49
|
+
for (const [runId, runEvents] of Array.from(runs.entries())) {
|
|
50
|
+
const started = runEvents.find(
|
|
51
|
+
(e) => e.type === "SCENARIO_RUN_STARTED"
|
|
52
|
+
);
|
|
53
|
+
const finished = runEvents.find(
|
|
54
|
+
(e) => e.type === "SCENARIO_RUN_FINISHED"
|
|
55
|
+
);
|
|
56
|
+
const messages = runEvents.filter(
|
|
57
|
+
(e) => e.type === "SCENARIO_MESSAGE_SNAPSHOT"
|
|
58
|
+
);
|
|
59
|
+
this.results.push({
|
|
60
|
+
name: ((_a = started == null ? void 0 : started.metadata) == null ? void 0 : _a.name) ?? fullName,
|
|
61
|
+
status: (finished == null ? void 0 : finished.status) ?? "UNKNOWN",
|
|
62
|
+
duration: started && finished ? finished.timestamp - started.timestamp : 0,
|
|
63
|
+
reasoning: (_b = finished == null ? void 0 : finished.results) == null ? void 0 : _b.reasoning,
|
|
64
|
+
criteria: (finished == null ? void 0 : finished.results) ? `Success Criteria: ${((_c = finished.results.metCriteria) == null ? void 0 : _c.length) ?? 0}/${(((_d = finished.results.metCriteria) == null ? void 0 : _d.length) ?? 0) + (((_e = finished.results.unmetCriteria) == null ? void 0 : _e.length) ?? 0)}` : void 0
|
|
65
|
+
});
|
|
66
|
+
console.log(
|
|
67
|
+
`
|
|
68
|
+
--- Scenario Run: ${((_f = started == null ? void 0 : started.metadata) == null ? void 0 : _f.name) ?? runId} ---`
|
|
69
|
+
);
|
|
70
|
+
if (started) {
|
|
71
|
+
console.log(`Description: ${((_g = started.metadata) == null ? void 0 : _g.description) ?? ""}`);
|
|
72
|
+
}
|
|
73
|
+
if (messages.length) {
|
|
74
|
+
console.log("Chat log:");
|
|
75
|
+
let lastMessageCount = 0;
|
|
76
|
+
for (const msg of messages) {
|
|
77
|
+
const allMessages = msg.messages ?? [];
|
|
78
|
+
for (const m of allMessages.slice(lastMessageCount)) {
|
|
79
|
+
const role = m.role;
|
|
80
|
+
let roleLabel = role;
|
|
81
|
+
if (role.toLowerCase() === "user") roleLabel = chalk.green("User");
|
|
82
|
+
else if (role.toLowerCase() === "agent")
|
|
83
|
+
roleLabel = chalk.cyan("Agent");
|
|
84
|
+
else if (role.toLowerCase() === "assistant")
|
|
85
|
+
roleLabel = chalk.cyan("Assistant");
|
|
86
|
+
else roleLabel = chalk.yellow(role);
|
|
87
|
+
console.log(`${roleLabel}: ${m.content}`);
|
|
88
|
+
}
|
|
89
|
+
lastMessageCount = allMessages.length;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
if (finished) {
|
|
93
|
+
console.log("--- Verdict ---");
|
|
94
|
+
console.log(`Status: ${finished.status}`);
|
|
95
|
+
if (finished.results) {
|
|
96
|
+
console.log(`Verdict: ${finished.results.verdict}`);
|
|
97
|
+
if (finished.results.reasoning)
|
|
98
|
+
console.log(`Reasoning: ${finished.results.reasoning}`);
|
|
99
|
+
if ((_h = finished.results.metCriteria) == null ? void 0 : _h.length)
|
|
100
|
+
console.log(
|
|
101
|
+
`Met criteria: ${finished.results.metCriteria.join(", ")}`
|
|
102
|
+
);
|
|
103
|
+
if ((_i = finished.results.unmetCriteria) == null ? void 0 : _i.length)
|
|
104
|
+
console.log(
|
|
105
|
+
`Unmet criteria: ${finished.results.unmetCriteria.join(", ")}`
|
|
106
|
+
);
|
|
107
|
+
if (finished.results.error)
|
|
108
|
+
console.log(`Error: ${finished.results.error}`);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
console.log("-----------------------------\n");
|
|
112
|
+
}
|
|
113
|
+
fs.unlinkSync(filePath);
|
|
114
|
+
}
|
|
115
|
+
async onTestRunEnd() {
|
|
116
|
+
if (this.results.length === 0) return;
|
|
117
|
+
const total = this.results.length;
|
|
118
|
+
const passed = this.results.filter((r) => r.status === "SUCCESS").length;
|
|
119
|
+
const failed = this.results.filter((r) => r.status !== "SUCCESS").length;
|
|
120
|
+
const successRate = (passed / total * 100).toFixed(1);
|
|
121
|
+
console.log();
|
|
122
|
+
console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
|
|
123
|
+
console.log(`Total Scenarios: ${total}`);
|
|
124
|
+
console.log(chalk.green(`Passed: ${passed}`));
|
|
125
|
+
console.log(chalk.red(`Failed: ${failed}`));
|
|
126
|
+
console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
|
|
127
|
+
this.results.forEach((r, i) => {
|
|
128
|
+
const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
|
|
129
|
+
console.log();
|
|
130
|
+
console.log(
|
|
131
|
+
`${i + 1}. ${r.name} - ${statusColor(r.status)} in ${(r.duration / 1e3).toFixed(2)}s`
|
|
132
|
+
);
|
|
133
|
+
if (r.reasoning) {
|
|
134
|
+
console.log(chalk.greenBright(" Reasoning: ") + r.reasoning);
|
|
135
|
+
}
|
|
136
|
+
if (r.criteria) {
|
|
137
|
+
console.log(chalk.bold(" " + r.criteria));
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
console.log();
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
export {
|
|
145
|
+
VitestReporter
|
|
146
|
+
};
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// src/config/env.ts
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
|
|
4
|
+
// src/config/log-levels.ts
|
|
5
|
+
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
6
|
+
LogLevel2["ERROR"] = "ERROR";
|
|
7
|
+
LogLevel2["WARN"] = "WARN";
|
|
8
|
+
LogLevel2["INFO"] = "INFO";
|
|
9
|
+
LogLevel2["DEBUG"] = "DEBUG";
|
|
10
|
+
return LogLevel2;
|
|
11
|
+
})(LogLevel || {});
|
|
12
|
+
|
|
13
|
+
// src/config/env.ts
|
|
14
|
+
var envSchema = z.object({
|
|
15
|
+
/**
|
|
16
|
+
* LangWatch API key for event reporting.
|
|
17
|
+
* If not provided, events will not be sent to LangWatch.
|
|
18
|
+
*/
|
|
19
|
+
LANGWATCH_API_KEY: z.string().optional(),
|
|
20
|
+
/**
|
|
21
|
+
* LangWatch endpoint URL for event reporting.
|
|
22
|
+
* Defaults to the production LangWatch endpoint.
|
|
23
|
+
*/
|
|
24
|
+
LANGWATCH_ENDPOINT: z.string().url().default("https://app.langwatch.ai"),
|
|
25
|
+
/**
|
|
26
|
+
* Disables simulation report info messages when set to any truthy value.
|
|
27
|
+
* Useful for CI/CD environments or when you want cleaner output.
|
|
28
|
+
*/
|
|
29
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z.string().optional().transform((val) => Boolean(val)),
|
|
30
|
+
/**
|
|
31
|
+
* Node environment - affects logging and behavior.
|
|
32
|
+
* Defaults to 'development' if not specified.
|
|
33
|
+
*/
|
|
34
|
+
NODE_ENV: z.enum(["development", "production", "test"]).default("development"),
|
|
35
|
+
/**
|
|
36
|
+
* Log level for the scenario package.
|
|
37
|
+
* Defaults to 'info' if not specified.
|
|
38
|
+
*/
|
|
39
|
+
LOG_LEVEL: z.nativeEnum(LogLevel).optional(),
|
|
40
|
+
/**
|
|
41
|
+
* Scenario batch run ID.
|
|
42
|
+
* If not provided, a random ID will be generated.
|
|
43
|
+
*/
|
|
44
|
+
SCENARIO_BATCH_RUN_ID: z.string().optional()
|
|
45
|
+
});
|
|
46
|
+
var env = envSchema.parse(process.env);
|
|
47
|
+
|
|
48
|
+
// src/utils/logger.ts
|
|
49
|
+
var Logger = class _Logger {
|
|
50
|
+
constructor(context) {
|
|
51
|
+
this.context = context;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Creates a logger with context (e.g., class name)
|
|
55
|
+
*/
|
|
56
|
+
static create(context) {
|
|
57
|
+
return new _Logger(context);
|
|
58
|
+
}
|
|
59
|
+
getLogLevel() {
|
|
60
|
+
return env.LOG_LEVEL ?? "INFO" /* INFO */;
|
|
61
|
+
}
|
|
62
|
+
getLogLevelIndex(level) {
|
|
63
|
+
return Object.values(LogLevel).indexOf(level);
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Checks if logging should occur based on LOG_LEVEL env var
|
|
67
|
+
*/
|
|
68
|
+
shouldLog(level) {
|
|
69
|
+
const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
|
|
70
|
+
const requestedLevelIndex = this.getLogLevelIndex(level);
|
|
71
|
+
return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
|
|
72
|
+
}
|
|
73
|
+
formatMessage(message) {
|
|
74
|
+
return this.context ? `[${this.context}] ${message}` : message;
|
|
75
|
+
}
|
|
76
|
+
error(message, data) {
|
|
77
|
+
if (this.shouldLog("ERROR" /* ERROR */)) {
|
|
78
|
+
const formattedMessage = this.formatMessage(message);
|
|
79
|
+
if (data) {
|
|
80
|
+
console.error(formattedMessage, data);
|
|
81
|
+
} else {
|
|
82
|
+
console.error(formattedMessage);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
warn(message, data) {
|
|
87
|
+
if (this.shouldLog("WARN" /* WARN */)) {
|
|
88
|
+
const formattedMessage = this.formatMessage(message);
|
|
89
|
+
if (data) {
|
|
90
|
+
console.warn(formattedMessage, data);
|
|
91
|
+
} else {
|
|
92
|
+
console.warn(formattedMessage);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
info(message, data) {
|
|
97
|
+
if (this.shouldLog("INFO" /* INFO */)) {
|
|
98
|
+
const formattedMessage = this.formatMessage(message);
|
|
99
|
+
if (data) {
|
|
100
|
+
console.info(formattedMessage, data);
|
|
101
|
+
} else {
|
|
102
|
+
console.info(formattedMessage);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
debug(message, data) {
|
|
107
|
+
if (this.shouldLog("DEBUG" /* DEBUG */)) {
|
|
108
|
+
const formattedMessage = this.formatMessage(message);
|
|
109
|
+
if (data) {
|
|
110
|
+
console.log(formattedMessage, data);
|
|
111
|
+
} else {
|
|
112
|
+
console.log(formattedMessage);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
export {
|
|
119
|
+
env,
|
|
120
|
+
Logger
|
|
121
|
+
};
|