npm - @langwatch/scenario - Versions diffs - 0.2.0-prerelease.0 → 0.2.0 - Mend

@langwatch/scenario 0.2.0-prerelease.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +72 -17
package/dist/chunk-7P6ASYW6.mjs +9 -0
package/dist/chunk-ORWSJC5F.mjs +309 -0
package/dist/index.d.mts +642 -515
package/dist/index.d.ts +642 -515
package/dist/index.js +977 -907
package/dist/index.mjs +845 -1073
package/dist/integrations/vitest/reporter.d.mts +9 -0
package/dist/integrations/vitest/reporter.d.ts +9 -0
package/dist/integrations/vitest/reporter.js +168 -0
package/dist/integrations/vitest/reporter.mjs +139 -0
package/dist/integrations/vitest/setup.d.mts +2 -0
package/dist/integrations/vitest/setup.d.ts +2 -0
package/dist/integrations/vitest/setup.js +377 -0
package/dist/integrations/vitest/setup.mjs +51 -0
package/package.json +17 -5

package/README.md CHANGED Viewed

@@ -1,9 +1,9 @@
 # Scenario
-![scenario](../assets/scenario-wide.webp)
+![scenario](https://github.com/langwatch/scenario/raw/refs/heads/main/assets/scenario-wide.webp)
-[![npm version](https://badge.fury.io/js/%40getscenario%2Fscenario.svg)](https://badge.fury.io/js/%40getscenario%2Fscenario)
+[![npm version](https://badge.fury.io/js/%40langwatch%2Fscenario.svg)](https://badge.fury.io/js/%40langwatch%2Fscenario)
 A powerful TypeScript library for testing AI agents in realistic, scripted scenarios.
@@ -21,11 +21,11 @@ Scenario provides a declarative DSL for defining test cases, allowing you to con
 ## Installation
 ```bash
-pnpm add @getscenario/scenario
+pnpm add @langwatch/scenario
 # or
-npm install @getscenario/scenario
+npm install @langwatch/scenario
 # or
-yarn add @getscenario/scenario
+yarn add @langwatch/scenario
 ```
 ## Quick Start
@@ -34,7 +34,7 @@ Create your first scenario test in under a minute.
 ```typescript
 // echo.test.ts
-import { run, AgentRole, AgentAdapter, user, agent, succeed } from "@getscenario/scenario";
+import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
 // 1. Create an adapter for your agent
 const echoAgent: AgentAdapter = {
@@ -48,14 +48,14 @@ const echoAgent: AgentAdapter = {
 // 2. Define and run your scenario
 async function testEchoAgent() {
-  const result = await run({
+  const result = await scenario.run({
     name: "Echo Agent Test",
     description: "The agent should echo back the user's message.",
     agents: [echoAgent],
     script: [
-      user("Hello world!"),
-      agent("You said: Hello world!"), // You can assert the agent's response directly
-      succeed("Agent correctly echoed the message."),
+      scenario.user("Hello world!"),
+      scenario.agent("You said: Hello world!"), // You can assert the agent's response directly
+      scenario.succeed("Agent correctly echoed the message."),
     ],
   });
@@ -77,7 +77,7 @@ Scenario integrates seamlessly with test runners like [Vitest](https://vitest.de
 // weather.test.ts
 import { describe, it, expect } from "vitest";
 import { openai } from "@ai-sdk/openai";
-import { run, userSimulatorAgent, AgentRole, AgentAdapter, user, agent, succeed } from "@getscenario/scenario";
+import scenario, { type AgentAdapter, AgentRole } from "@langwatch/scenario";
 import { generateText, tool } from "ai";
 import { z } from "zod";
@@ -117,21 +117,21 @@ describe("Weather Agent", () => {
     };
     // 3. Define and run your scenario
-    const result = await run({
+    const result = await scenario.run({
       name: "Checking the weather",
       description: "The user asks for the weather in a specific city, and the agent should use the weather tool to find it.",
       agents: [
         weatherAgent,
-        userSimulatorAgent({ model: openai("gpt-4.1-mini") }),
+        scenario.userSimulatorAgent({ model: openai("gpt-4.1-mini") }),
       ],
       script: [
-        user("What's the weather like in Barcelona?"),
-        agent(),
+        scenario.user("What's the weather like in Barcelona?"),
+        scenario.agent(),
         // You can use inline assertions within your script
         (state) => {
           expect(state.hasToolCall("get_current_weather")).toBe(true);
         },
-        succeed("Agent correctly used the weather tool."),
+        scenario.succeed("Agent correctly used the weather tool."),
       ],
     });
@@ -157,6 +157,7 @@ The configuration object for a scenario.
 - `script?: ScriptStep[]`: An optional array of steps to control the scenario flow. If not provided, the scenario will proceed automatically.
 - `maxTurns?: number`: The maximum number of conversation turns before a timeout. Defaults to 10.
 - `verbose?: boolean`: Enables detailed logging during execution.
+- `setId?: string`: (Optional) Groups related scenarios into a test suite ("Simulation Set"). Useful for organizing and tracking scenarios in the UI and reporting. If not provided, the scenario will not be grouped into a set.
 ### Agents
@@ -212,7 +213,7 @@ You can configure project-wide defaults by creating a `scenario.config.js` or `s
 ```js
 // scenario.config.mjs
-import { defineConfig } from "@getscenario/scenario/config";
+import { defineConfig } from "@langwatch/scenario/config";
 import { openai } from "@ai-sdk/openai";
 export default defineConfig({
@@ -250,6 +251,49 @@ You can control the library's behavior with the following environment variables:
 - `LANGWATCH_API_KEY`: Your LangWatch API key. This is used as a fallback if `langwatchApiKey` is not set in your config file.
 - `LANGWATCH_ENDPOINT`: The LangWatch reporting endpoint. This is used as a fallback if `langwatchEndpoint` is not set in your config file.
+## Grouping Scenarios with setId
+You can group related scenarios into a set ("Simulation Set") by providing the `setId` option. This is useful for organizing your scenarios in the UI and for reporting in LangWatch.
+```typescript
+const result = await scenario.run({
+  name: "my first scenario",
+  description: "A simple test to see if the agent responds.",
+  setId: "my-test-suite", // Group this scenario into a set
+  agents: [
+    myAgent,
+    scenario.userSimulatorAgent(),
+  ],
+});
+```
+This will group all scenarios with the same `setId` together in the LangWatch UI and reporting tools.
+- The `setupFiles` entry enables Scenario's event logging for each test.
+- The custom `VitestReporter` provides detailed scenario test reports in your test output.
+## Vitest Integration
+To get rich scenario reporting and logging with Vitest, add the Scenario custom reporter and setup file to your `vitest.config.ts`:
+```typescript
+// vitest.config.ts
+import { defineConfig } from "vitest/config";
+import VitestReporter from '@langwatch/scenario/integrations/vitest/reporter';
+export default defineConfig({
+  test: {
+    testTimeout: 180000, // 3 minutes, or however long you want to wait for the scenario to run
+    setupFiles: ['@langwatch/scenario/integrations/vitest/setup'],
+    reporters: [
+      'default',
+      new VitestReporter(),
+    ],
+  },
+});
+```
 ## Development
 This project uses `pnpm` for package management.
@@ -270,3 +314,14 @@ pnpm test
 ## License
 MIT
+### SCENARIO_BATCH_RUN_ID
+When running scenario tests, you can set the `SCENARIO_BATCH_RUN_ID` environment variable to uniquely identify a batch of test runs. This is especially useful for grouping results in reporting tools and CI pipelines.
+Example:
+```bash
+SCENARIO_BATCH_RUN_ID=my-ci-run-123 pnpm test
+```
+If you use the provided test script, a unique batch run ID is generated automatically for each run.

package/dist/chunk-7P6ASYW6.mjs ADDED Viewed

@@ -0,0 +1,9 @@
+var __defProp = Object.defineProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+export {
+  __export
+};

package/dist/chunk-ORWSJC5F.mjs ADDED Viewed

@@ -0,0 +1,309 @@
+// src/events/event-bus.ts
+import { concatMap, EMPTY, catchError, Subject } from "rxjs";
+// src/utils/logger.ts
+var Logger = class _Logger {
+  constructor(context) {
+    this.context = context;
+  }
+  /**
+   * Creates a logger with context (e.g., class name)
+   */
+  static create(context) {
+    return new _Logger(context);
+  }
+  /**
+   * Checks if logging should occur based on LOG_LEVEL env var
+   */
+  shouldLog(level) {
+    const logLevel = (process.env.SCENARIO_LOG_LEVEL || "").toLowerCase();
+    const levels = ["error", "warn", "info", "debug"];
+    const currentLevelIndex = levels.indexOf(logLevel);
+    const requestedLevelIndex = levels.indexOf(level);
+    return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
+  }
+  formatMessage(message) {
+    return this.context ? `[${this.context}] ${message}` : message;
+  }
+  error(message, data) {
+    if (this.shouldLog("error")) {
+      const formattedMessage = this.formatMessage(message);
+      if (data) {
+        console.error(formattedMessage, data);
+      } else {
+        console.error(formattedMessage);
+      }
+    }
+  }
+  warn(message, data) {
+    if (this.shouldLog("warn")) {
+      const formattedMessage = this.formatMessage(message);
+      if (data) {
+        console.warn(formattedMessage, data);
+      } else {
+        console.warn(formattedMessage);
+      }
+    }
+  }
+  info(message, data) {
+    if (this.shouldLog("info")) {
+      const formattedMessage = this.formatMessage(message);
+      if (data) {
+        console.info(formattedMessage, data);
+      } else {
+        console.info(formattedMessage);
+      }
+    }
+  }
+  debug(message, data) {
+    if (this.shouldLog("debug")) {
+      const formattedMessage = this.formatMessage(message);
+      if (data) {
+        console.log(formattedMessage, data);
+      } else {
+        console.log(formattedMessage);
+      }
+    }
+  }
+};
+// src/events/event-reporter.ts
+var EventReporter = class {
+  eventsEndpoint;
+  apiKey;
+  logger = new Logger("scenario.events.EventReporter");
+  constructor(config) {
+    this.eventsEndpoint = new URL("/api/scenario-events", config.endpoint);
+    this.apiKey = config.apiKey ?? "";
+    if (!process.env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO) {
+      if (!this.apiKey) {
+        console.log(
+          "\u27A1\uFE0F  LangWatch API key not configured, simulations will only output the final result"
+        );
+        console.log(
+          "To visualize the conversations in real time, configure your LangWatch API key (via LANGWATCH_API_KEY, or scenario.config.js)"
+        );
+      } else {
+        console.log(`simulation reporting is enabled, endpoint:(${this.eventsEndpoint}) api_key_configured:(${this.apiKey.length > 0 ? "true" : "false"})`);
+      }
+    }
+  }
+  /**
+   * Posts an event to the configured endpoint.
+   * Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
+   */
+  async postEvent(event) {
+    this.logger.debug(`[${event.type}] Posting event`, {
+      event
+    });
+    if (!this.eventsEndpoint) {
+      this.logger.warn(
+        "No LANGWATCH_ENDPOINT configured, skipping event posting"
+      );
+      return;
+    }
+    try {
+      const response = await fetch(this.eventsEndpoint.href, {
+        method: "POST",
+        body: JSON.stringify(event),
+        headers: {
+          "Content-Type": "application/json",
+          "X-Auth-Token": this.apiKey
+        }
+      });
+      this.logger.debug(
+        `[${event.type}] Event POST response status: ${response.status}`
+      );
+      if (response.ok) {
+        const data = await response.json();
+        this.logger.debug(`[${event.type}] Event POST response:`, data);
+      } else {
+        const errorText = await response.text();
+        this.logger.error(`[${event.type}] Event POST failed:`, {
+          status: response.status,
+          statusText: response.statusText,
+          error: errorText,
+          event
+        });
+      }
+    } catch (error) {
+      this.logger.error(`[${event.type}] Event POST error:`, {
+        error,
+        event,
+        endpoint: this.eventsEndpoint
+      });
+    }
+  }
+};
+// src/events/schema.ts
+import { EventType, MessagesSnapshotEventSchema } from "@ag-ui/core";
+import { z } from "zod";
+var Verdict = /* @__PURE__ */ ((Verdict2) => {
+  Verdict2["SUCCESS"] = "success";
+  Verdict2["FAILURE"] = "failure";
+  Verdict2["INCONCLUSIVE"] = "inconclusive";
+  return Verdict2;
+})(Verdict || {});
+var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
+  ScenarioRunStatus2["SUCCESS"] = "SUCCESS";
+  ScenarioRunStatus2["ERROR"] = "ERROR";
+  ScenarioRunStatus2["CANCELLED"] = "CANCELLED";
+  ScenarioRunStatus2["IN_PROGRESS"] = "IN_PROGRESS";
+  ScenarioRunStatus2["PENDING"] = "PENDING";
+  ScenarioRunStatus2["FAILED"] = "FAILED";
+  return ScenarioRunStatus2;
+})(ScenarioRunStatus || {});
+var baseEventSchema = z.object({
+  type: z.nativeEnum(EventType),
+  timestamp: z.number(),
+  rawEvent: z.any().optional()
+});
+var batchRunIdSchema = z.string();
+var scenarioRunIdSchema = z.string();
+var scenarioIdSchema = z.string();
+var baseScenarioEventSchema = baseEventSchema.extend({
+  batchRunId: batchRunIdSchema,
+  scenarioId: scenarioIdSchema,
+  scenarioRunId: scenarioRunIdSchema,
+  scenarioSetId: z.string().optional().default("default")
+});
+var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
+  type: z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
+  metadata: z.object({
+    name: z.string().optional(),
+    description: z.string().optional()
+  })
+});
+var scenarioResultsSchema = z.object({
+  verdict: z.nativeEnum(Verdict),
+  reasoning: z.string().optional(),
+  metCriteria: z.array(z.string()),
+  unmetCriteria: z.array(z.string()),
+  error: z.string().optional()
+});
+var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
+  type: z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
+  status: z.nativeEnum(ScenarioRunStatus),
+  results: scenarioResultsSchema.optional().nullable()
+});
+var scenarioMessageSnapshotSchema = MessagesSnapshotEventSchema.merge(
+  baseScenarioEventSchema.extend({
+    type: z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
+  })
+);
+var scenarioEventSchema = z.discriminatedUnion("type", [
+  scenarioRunStartedSchema,
+  scenarioRunFinishedSchema,
+  scenarioMessageSnapshotSchema
+]);
+var successSchema = z.object({ success: z.boolean() });
+var errorSchema = z.object({ error: z.string() });
+var stateSchema = z.object({
+  state: z.object({
+    messages: z.array(z.any()),
+    status: z.string()
+  })
+});
+var runsSchema = z.object({ runs: z.array(z.string()) });
+var eventsSchema = z.object({ events: z.array(scenarioEventSchema) });
+// src/events/event-bus.ts
+var EventBus = class _EventBus {
+  static registry = /* @__PURE__ */ new Set();
+  events$ = new Subject();
+  eventReporter;
+  processingPromise = null;
+  logger = new Logger("scenario.events.EventBus");
+  static globalListeners = [];
+  constructor(config) {
+    this.eventReporter = new EventReporter(config);
+    _EventBus.registry.add(this);
+    for (const listener of _EventBus.globalListeners) {
+      listener(this);
+    }
+  }
+  static getAllBuses() {
+    return _EventBus.registry;
+  }
+  static addGlobalListener(listener) {
+    _EventBus.globalListeners.push(listener);
+  }
+  /**
+   * Publishes an event into the processing pipeline.
+   */
+  publish(event) {
+    this.logger.debug(`[${event.type}] Publishing event`, {
+      event
+    });
+    this.events$.next(event);
+  }
+  /**
+   * Begins listening for and processing events.
+   * Returns a promise that resolves when a RUN_FINISHED event is fully processed.
+   */
+  listen() {
+    this.logger.debug("Listening for events");
+    if (this.processingPromise) {
+      return this.processingPromise;
+    }
+    this.processingPromise = new Promise((resolve, reject) => {
+      this.events$.pipe(
+        concatMap(async (event) => {
+          this.logger.debug(`[${event.type}] Processing event`, {
+            event
+          });
+          await this.eventReporter.postEvent(event);
+          return event;
+        }),
+        catchError((error) => {
+          this.logger.error("Error in event stream:", error);
+          return EMPTY;
+        })
+      ).subscribe({
+        next: (event) => {
+          this.logger.debug(`[${event.type}] Event processed`, {
+            event
+          });
+          if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
+            resolve();
+          }
+        },
+        error: (error) => {
+          this.logger.error("Error in event stream:", error);
+          reject(error);
+        }
+      });
+    });
+    return this.processingPromise;
+  }
+  /**
+   * Stops accepting new events and drains the processing queue.
+   */
+  async drain() {
+    this.logger.debug("Draining event stream");
+    this.events$.complete();
+    if (this.processingPromise) {
+      await this.processingPromise;
+    }
+  }
+  /**
+   * Subscribes to an event stream.
+   * @param source$ - The event stream to subscribe to.
+   */
+  subscribeTo(source$) {
+    this.logger.debug("Subscribing to event stream");
+    return source$.subscribe(this.events$);
+  }
+  /**
+   * Expose the events$ observable for external subscription (read-only).
+   */
+  get eventsObservable() {
+    return this.events$.asObservable();
+  }
+};
+export {
+  Logger,
+  EventBus
+};