@langwatch/scenario 0.2.0-prerelease.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,9 +1,9 @@
1
1
  # Scenario
2
2
 
3
- ![scenario](../assets/scenario-wide.webp)
3
+ ![scenario](https://github.com/langwatch/scenario/raw/refs/heads/main/assets/scenario-wide.webp)
4
4
 
5
5
 
6
- [![npm version](https://badge.fury.io/js/%40getscenario%2Fscenario.svg)](https://badge.fury.io/js/%40getscenario%2Fscenario)
6
+ [![npm version](https://badge.fury.io/js/%40langwatch%2Fscenario.svg)](https://badge.fury.io/js/%40langwatch%2Fscenario)
7
7
 
8
8
  A powerful TypeScript library for testing AI agents in realistic, scripted scenarios.
9
9
 
@@ -21,11 +21,11 @@ Scenario provides a declarative DSL for defining test cases, allowing you to con
21
21
  ## Installation
22
22
 
23
23
  ```bash
24
- pnpm add @getscenario/scenario
24
+ pnpm add @langwatch/scenario
25
25
  # or
26
- npm install @getscenario/scenario
26
+ npm install @langwatch/scenario
27
27
  # or
28
- yarn add @getscenario/scenario
28
+ yarn add @langwatch/scenario
29
29
  ```
30
30
 
31
31
  ## Quick Start
@@ -34,7 +34,7 @@ Create your first scenario test in under a minute.
34
34
 
35
35
  ```typescript
36
36
  // echo.test.ts
37
- import { run, AgentRole, AgentAdapter, user, agent, succeed } from "@getscenario/scenario";
37
+ import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
38
38
 
39
39
  // 1. Create an adapter for your agent
40
40
  const echoAgent: AgentAdapter = {
@@ -48,14 +48,14 @@ const echoAgent: AgentAdapter = {
48
48
 
49
49
  // 2. Define and run your scenario
50
50
  async function testEchoAgent() {
51
- const result = await run({
51
+ const result = await scenario.run({
52
52
  name: "Echo Agent Test",
53
53
  description: "The agent should echo back the user's message.",
54
54
  agents: [echoAgent],
55
55
  script: [
56
- user("Hello world!"),
57
- agent("You said: Hello world!"), // You can assert the agent's response directly
58
- succeed("Agent correctly echoed the message."),
56
+ scenario.user("Hello world!"),
57
+ scenario.agent("You said: Hello world!"), // You can assert the agent's response directly
58
+ scenario.succeed("Agent correctly echoed the message."),
59
59
  ],
60
60
  });
61
61
 
@@ -77,7 +77,7 @@ Scenario integrates seamlessly with test runners like [Vitest](https://vitest.de
77
77
  // weather.test.ts
78
78
  import { describe, it, expect } from "vitest";
79
79
  import { openai } from "@ai-sdk/openai";
80
- import { run, userSimulatorAgent, AgentRole, AgentAdapter, user, agent, succeed } from "@getscenario/scenario";
80
+ import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
81
81
  import { generateText, tool } from "ai";
82
82
  import { z } from "zod";
83
83
 
@@ -117,21 +117,21 @@ describe("Weather Agent", () => {
117
117
  };
118
118
 
119
119
  // 3. Define and run your scenario
120
- const result = await run({
120
+ const result = await scenario.run({
121
121
  name: "Checking the weather",
122
122
  description: "The user asks for the weather in a specific city, and the agent should use the weather tool to find it.",
123
123
  agents: [
124
124
  weatherAgent,
125
- userSimulatorAgent({ model: openai("gpt-4.1-mini") }),
125
+ scenario.userSimulatorAgent({ model: openai("gpt-4.1-mini") }),
126
126
  ],
127
127
  script: [
128
- user("What's the weather like in Barcelona?"),
129
- agent(),
128
+ scenario.user("What's the weather like in Barcelona?"),
129
+ scenario.agent(),
130
130
  // You can use inline assertions within your script
131
131
  (state) => {
132
132
  expect(state.hasToolCall("get_current_weather")).toBe(true);
133
133
  },
134
- succeed("Agent correctly used the weather tool."),
134
+ scenario.succeed("Agent correctly used the weather tool."),
135
135
  ],
136
136
  });
137
137
 
@@ -157,6 +157,7 @@ The configuration object for a scenario.
157
157
  - `script?: ScriptStep[]`: An optional array of steps to control the scenario flow. If not provided, the scenario will proceed automatically.
158
158
  - `maxTurns?: number`: The maximum number of conversation turns before a timeout. Defaults to 10.
159
159
  - `verbose?: boolean`: Enables detailed logging during execution.
160
+ - `setId?: string`: (Optional) Groups related scenarios into a test suite ("Simulation Set"). Useful for organizing and tracking scenarios in the UI and reporting. If not provided, the scenario will not be grouped into a set.
160
161
 
161
162
  ### Agents
162
163
 
@@ -212,7 +213,7 @@ You can configure project-wide defaults by creating a `scenario.config.js` or `s
212
213
 
213
214
  ```js
214
215
  // scenario.config.mjs
215
- import { defineConfig } from "@getscenario/scenario/config";
216
+ import { defineConfig } from "@langwatch/scenario/config";
216
217
  import { openai } from "@ai-sdk/openai";
217
218
 
218
219
  export default defineConfig({
@@ -250,6 +251,49 @@ You can control the library's behavior with the following environment variables:
250
251
  - `LANGWATCH_API_KEY`: Your LangWatch API key. This is used as a fallback if `langwatchApiKey` is not set in your config file.
251
252
  - `LANGWATCH_ENDPOINT`: The LangWatch reporting endpoint. This is used as a fallback if `langwatchEndpoint` is not set in your config file.
252
253
 
254
+ ## Grouping Scenarios with setId
255
+
256
+ You can group related scenarios into a set ("Simulation Set") by providing the `setId` option. This is useful for organizing your scenarios in the UI and for reporting in LangWatch.
257
+
258
+ ```typescript
259
+ const result = await scenario.run({
260
+ name: "my first scenario",
261
+ description: "A simple test to see if the agent responds.",
262
+ setId: "my-test-suite", // Group this scenario into a set
263
+ agents: [
264
+ myAgent,
265
+ scenario.userSimulatorAgent(),
266
+ ],
267
+ });
268
+ ```
269
+
270
+ This will group all scenarios with the same `setId` together in the LangWatch UI and reporting tools.
271
+
272
+ - The `setupFiles` entry enables Scenario's event logging for each test.
273
+ - The custom `VitestReporter` provides detailed scenario test reports in your test output.
274
+
275
+
276
+ ## Vitest Integration
277
+
278
+ To get rich scenario reporting and logging with Vitest, add the Scenario custom reporter and setup file to your `vitest.config.ts`:
279
+
280
+ ```typescript
281
+ // vitest.config.ts
282
+ import { defineConfig } from "vitest/config";
283
+ import VitestReporter from '@langwatch/scenario/integrations/vitest/reporter';
284
+
285
+ export default defineConfig({
286
+ test: {
287
+ testTimeout: 180000, // 3 minutes, or however long you want to wait for the scenario to run
288
+ setupFiles: ['@langwatch/scenario/integrations/vitest/setup'],
289
+ reporters: [
290
+ 'default',
291
+ new VitestReporter(),
292
+ ],
293
+ },
294
+ });
295
+ ```
296
+
253
297
  ## Development
254
298
 
255
299
  This project uses `pnpm` for package management.
@@ -270,3 +314,14 @@ pnpm test
270
314
  ## License
271
315
 
272
316
  MIT
317
+
318
+ ### SCENARIO_BATCH_RUN_ID
319
+
320
+ When running scenario tests, you can set the `SCENARIO_BATCH_RUN_ID` environment variable to uniquely identify a batch of test runs. This is especially useful for grouping results in reporting tools and CI pipelines.
321
+
322
+ Example:
323
+ ```bash
324
+ SCENARIO_BATCH_RUN_ID=my-ci-run-123 pnpm test
325
+ ```
326
+
327
+ If you use the provided test script, a unique batch run ID is generated automatically for each run.
@@ -0,0 +1,9 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __export = (target, all) => {
3
+ for (var name in all)
4
+ __defProp(target, name, { get: all[name], enumerable: true });
5
+ };
6
+
7
+ export {
8
+ __export
9
+ };
@@ -0,0 +1,309 @@
1
+ // src/events/event-bus.ts
2
+ import { concatMap, EMPTY, catchError, Subject } from "rxjs";
3
+
4
+ // src/utils/logger.ts
5
+ var Logger = class _Logger {
6
+ constructor(context) {
7
+ this.context = context;
8
+ }
9
+ /**
10
+ * Creates a logger with context (e.g., class name)
11
+ */
12
+ static create(context) {
13
+ return new _Logger(context);
14
+ }
15
+ /**
16
+ * Checks if logging should occur based on LOG_LEVEL env var
17
+ */
18
+ shouldLog(level) {
19
+ const logLevel = (process.env.SCENARIO_LOG_LEVEL || "").toLowerCase();
20
+ const levels = ["error", "warn", "info", "debug"];
21
+ const currentLevelIndex = levels.indexOf(logLevel);
22
+ const requestedLevelIndex = levels.indexOf(level);
23
+ return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
24
+ }
25
+ formatMessage(message) {
26
+ return this.context ? `[${this.context}] ${message}` : message;
27
+ }
28
+ error(message, data) {
29
+ if (this.shouldLog("error")) {
30
+ const formattedMessage = this.formatMessage(message);
31
+ if (data) {
32
+ console.error(formattedMessage, data);
33
+ } else {
34
+ console.error(formattedMessage);
35
+ }
36
+ }
37
+ }
38
+ warn(message, data) {
39
+ if (this.shouldLog("warn")) {
40
+ const formattedMessage = this.formatMessage(message);
41
+ if (data) {
42
+ console.warn(formattedMessage, data);
43
+ } else {
44
+ console.warn(formattedMessage);
45
+ }
46
+ }
47
+ }
48
+ info(message, data) {
49
+ if (this.shouldLog("info")) {
50
+ const formattedMessage = this.formatMessage(message);
51
+ if (data) {
52
+ console.info(formattedMessage, data);
53
+ } else {
54
+ console.info(formattedMessage);
55
+ }
56
+ }
57
+ }
58
+ debug(message, data) {
59
+ if (this.shouldLog("debug")) {
60
+ const formattedMessage = this.formatMessage(message);
61
+ if (data) {
62
+ console.log(formattedMessage, data);
63
+ } else {
64
+ console.log(formattedMessage);
65
+ }
66
+ }
67
+ }
68
+ };
69
+
70
+ // src/events/event-reporter.ts
71
+ var EventReporter = class {
72
+ eventsEndpoint;
73
+ apiKey;
74
+ logger = new Logger("scenario.events.EventReporter");
75
+ constructor(config) {
76
+ this.eventsEndpoint = new URL("/api/scenario-events", config.endpoint);
77
+ this.apiKey = config.apiKey ?? "";
78
+ if (!process.env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO) {
79
+ if (!this.apiKey) {
80
+ console.log(
81
+ "\u27A1\uFE0F LangWatch API key not configured, simulations will only output the final result"
82
+ );
83
+ console.log(
84
+ "To visualize the conversations in real time, configure your LangWatch API key (via LANGWATCH_API_KEY, or scenario.config.js)"
85
+ );
86
+ } else {
87
+ console.log(`simulation reporting is enabled, endpoint:(${this.eventsEndpoint}) api_key_configured:(${this.apiKey.length > 0 ? "true" : "false"})`);
88
+ }
89
+ }
90
+ }
91
+ /**
92
+ * Posts an event to the configured endpoint.
93
+ * Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
94
+ */
95
+ async postEvent(event) {
96
+ this.logger.debug(`[${event.type}] Posting event`, {
97
+ event
98
+ });
99
+ if (!this.eventsEndpoint) {
100
+ this.logger.warn(
101
+ "No LANGWATCH_ENDPOINT configured, skipping event posting"
102
+ );
103
+ return;
104
+ }
105
+ try {
106
+ const response = await fetch(this.eventsEndpoint.href, {
107
+ method: "POST",
108
+ body: JSON.stringify(event),
109
+ headers: {
110
+ "Content-Type": "application/json",
111
+ "X-Auth-Token": this.apiKey
112
+ }
113
+ });
114
+ this.logger.debug(
115
+ `[${event.type}] Event POST response status: ${response.status}`
116
+ );
117
+ if (response.ok) {
118
+ const data = await response.json();
119
+ this.logger.debug(`[${event.type}] Event POST response:`, data);
120
+ } else {
121
+ const errorText = await response.text();
122
+ this.logger.error(`[${event.type}] Event POST failed:`, {
123
+ status: response.status,
124
+ statusText: response.statusText,
125
+ error: errorText,
126
+ event
127
+ });
128
+ }
129
+ } catch (error) {
130
+ this.logger.error(`[${event.type}] Event POST error:`, {
131
+ error,
132
+ event,
133
+ endpoint: this.eventsEndpoint
134
+ });
135
+ }
136
+ }
137
+ };
138
+
139
+ // src/events/schema.ts
140
+ import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
141
+ import { z } from "zod";
142
+ var Verdict = /* @__PURE__ */ ((Verdict2) => {
143
+ Verdict2["SUCCESS"] = "success";
144
+ Verdict2["FAILURE"] = "failure";
145
+ Verdict2["INCONCLUSIVE"] = "inconclusive";
146
+ return Verdict2;
147
+ })(Verdict || {});
148
+ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
149
+ ScenarioRunStatus2["SUCCESS"] = "SUCCESS";
150
+ ScenarioRunStatus2["ERROR"] = "ERROR";
151
+ ScenarioRunStatus2["CANCELLED"] = "CANCELLED";
152
+ ScenarioRunStatus2["IN_PROGRESS"] = "IN_PROGRESS";
153
+ ScenarioRunStatus2["PENDING"] = "PENDING";
154
+ ScenarioRunStatus2["FAILED"] = "FAILED";
155
+ return ScenarioRunStatus2;
156
+ })(ScenarioRunStatus || {});
157
+ var baseEventSchema = z.object({
158
+ type: z.nativeEnum(EventType),
159
+ timestamp: z.number(),
160
+ rawEvent: z.any().optional()
161
+ });
162
+ var batchRunIdSchema = z.string();
163
+ var scenarioRunIdSchema = z.string();
164
+ var scenarioIdSchema = z.string();
165
+ var baseScenarioEventSchema = baseEventSchema.extend({
166
+ batchRunId: batchRunIdSchema,
167
+ scenarioId: scenarioIdSchema,
168
+ scenarioRunId: scenarioRunIdSchema,
169
+ scenarioSetId: z.string().optional().default("default")
170
+ });
171
+ var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
172
+ type: z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
173
+ metadata: z.object({
174
+ name: z.string().optional(),
175
+ description: z.string().optional()
176
+ })
177
+ });
178
+ var scenarioResultsSchema = z.object({
179
+ verdict: z.nativeEnum(Verdict),
180
+ reasoning: z.string().optional(),
181
+ metCriteria: z.array(z.string()),
182
+ unmetCriteria: z.array(z.string()),
183
+ error: z.string().optional()
184
+ });
185
+ var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
186
+ type: z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
187
+ status: z.nativeEnum(ScenarioRunStatus),
188
+ results: scenarioResultsSchema.optional().nullable()
189
+ });
190
+ var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
191
+ baseScenarioEventSchema.extend({
192
+ type: z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
193
+ })
194
+ );
195
+ var scenarioEventSchema = z.discriminatedUnion("type", [
196
+ scenarioRunStartedSchema,
197
+ scenarioRunFinishedSchema,
198
+ scenarioMessageSnapshotSchema
199
+ ]);
200
+ var successSchema = z.object({ success: z.boolean() });
201
+ var errorSchema = z.object({ error: z.string() });
202
+ var stateSchema = z.object({
203
+ state: z.object({
204
+ messages: z.array(z.any()),
205
+ status: z.string()
206
+ })
207
+ });
208
+ var runsSchema = z.object({ runs: z.array(z.string()) });
209
+ var eventsSchema = z.object({ events: z.array(scenarioEventSchema) });
210
+
211
+ // src/events/event-bus.ts
212
+ var EventBus = class _EventBus {
213
+ static registry = /* @__PURE__ */ new Set();
214
+ events$ = new Subject();
215
+ eventReporter;
216
+ processingPromise = null;
217
+ logger = new Logger("scenario.events.EventBus");
218
+ static globalListeners = [];
219
+ constructor(config) {
220
+ this.eventReporter = new EventReporter(config);
221
+ _EventBus.registry.add(this);
222
+ for (const listener of _EventBus.globalListeners) {
223
+ listener(this);
224
+ }
225
+ }
226
+ static getAllBuses() {
227
+ return _EventBus.registry;
228
+ }
229
+ static addGlobalListener(listener) {
230
+ _EventBus.globalListeners.push(listener);
231
+ }
232
+ /**
233
+ * Publishes an event into the processing pipeline.
234
+ */
235
+ publish(event) {
236
+ this.logger.debug(`[${event.type}] Publishing event`, {
237
+ event
238
+ });
239
+ this.events$.next(event);
240
+ }
241
+ /**
242
+ * Begins listening for and processing events.
243
+ * Returns a promise that resolves when a RUN_FINISHED event is fully processed.
244
+ */
245
+ listen() {
246
+ this.logger.debug("Listening for events");
247
+ if (this.processingPromise) {
248
+ return this.processingPromise;
249
+ }
250
+ this.processingPromise = new Promise((resolve, reject) => {
251
+ this.events$.pipe(
252
+ concatMap(async (event) => {
253
+ this.logger.debug(`[${event.type}] Processing event`, {
254
+ event
255
+ });
256
+ await this.eventReporter.postEvent(event);
257
+ return event;
258
+ }),
259
+ catchError((error) => {
260
+ this.logger.error("Error in event stream:", error);
261
+ return EMPTY;
262
+ })
263
+ ).subscribe({
264
+ next: (event) => {
265
+ this.logger.debug(`[${event.type}] Event processed`, {
266
+ event
267
+ });
268
+ if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
269
+ resolve();
270
+ }
271
+ },
272
+ error: (error) => {
273
+ this.logger.error("Error in event stream:", error);
274
+ reject(error);
275
+ }
276
+ });
277
+ });
278
+ return this.processingPromise;
279
+ }
280
+ /**
281
+ * Stops accepting new events and drains the processing queue.
282
+ */
283
+ async drain() {
284
+ this.logger.debug("Draining event stream");
285
+ this.events$.complete();
286
+ if (this.processingPromise) {
287
+ await this.processingPromise;
288
+ }
289
+ }
290
+ /**
291
+ * Subscribes to an event stream.
292
+ * @param source$ - The event stream to subscribe to.
293
+ */
294
+ subscribeTo(source$) {
295
+ this.logger.debug("Subscribing to event stream");
296
+ return source$.subscribe(this.events$);
297
+ }
298
+ /**
299
+ * Expose the events$ observable for external subscription (read-only).
300
+ */
301
+ get eventsObservable() {
302
+ return this.events$.asObservable();
303
+ }
304
+ };
305
+
306
+ export {
307
+ Logger,
308
+ EventBus
309
+ };