npm - promptfoo - Versions diffs - 0.17.9 → 0.18.1 - Mend

promptfoo 0.17.9 → 0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/README.md +5 -5
package/dist/package.json +1 -1
package/dist/src/assertions.d.ts.map +1 -1
package/dist/src/assertions.js +97 -42
package/dist/src/assertions.js.map +1 -1
package/dist/src/evaluator.d.ts.map +1 -1
package/dist/src/evaluator.js +35 -7
package/dist/src/evaluator.js.map +1 -1
package/dist/src/index.d.ts.map +1 -1
package/dist/src/index.js +3 -0
package/dist/src/index.js.map +1 -1
package/dist/src/main.js +9 -0
package/dist/src/main.js.map +1 -1
package/dist/src/providers/llama.d.ts +30 -0
package/dist/src/providers/llama.d.ts.map +1 -0
package/dist/src/providers/llama.js +67 -0
package/dist/src/providers/llama.js.map +1 -0
package/dist/src/providers.d.ts +2 -2
package/dist/src/providers.d.ts.map +1 -1
package/dist/src/providers.js +21 -2
package/dist/src/providers.js.map +1 -1
package/dist/src/table.js +2 -2
package/dist/src/table.js.map +1 -1
package/dist/src/types.d.ts +11 -4
package/dist/src/types.d.ts.map +1 -1
package/dist/src/util.d.ts.map +1 -1
package/dist/src/util.js +5 -2
package/dist/src/util.js.map +1 -1
package/package.json +1 -1
package/src/assertions.ts +102 -49
package/src/evaluator.ts +33 -4
package/src/index.ts +6 -1
package/src/main.ts +14 -0
package/src/providers/llama.ts +95 -0
package/src/providers.ts +27 -5
package/src/table.ts +2 -2
package/src/types.ts +25 -5
package/src/util.ts +12 -2
package/src/web/client/package-lock.json +0 -5726

package/src/assertions.ts CHANGED Viewed

@@ -99,12 +99,21 @@ export async function runAssertion(
     type: baseType,
   });
+  //render assertion values
+  let renderedValue = assertion.value;
+  // renderString for assertion values
+  if (renderedValue && typeof renderedValue === 'string') {
+    renderedValue = nunjucks.renderString(renderedValue, test.vars || {});
+  } else if (renderedValue && Array.isArray(renderedValue)) {
+    renderedValue = renderedValue.map((v) => nunjucks.renderString(v, test.vars || {}));
+  }
   if (baseType === 'equals') {
-    pass = assertion.value === output;
+    pass = renderedValue === output;
     return {
       pass,
       score: pass ? 1 : 0,
-      reason: pass ? 'Assertion passed' : `Expected output "${assertion.value}"`,
+      reason: pass ? 'Assertion passed' : `Expected output "${renderedValue}"`,
     };
   }
@@ -123,103 +132,99 @@ export async function runAssertion(
   }
   if (baseType === 'contains') {
-    invariant(assertion.value, '"contains" assertion type must have a string or number value');
+    invariant(renderedValue, '"contains" assertion type must have a string or number value');
     invariant(
-      typeof assertion.value === 'string' || typeof assertion.value === 'number',
+      typeof renderedValue === 'string' || typeof renderedValue === 'number',
       '"contains" assertion type must have a string or number value',
     );
-    pass = output.includes(String(assertion.value)) !== inverse;
+    pass = output.includes(String(renderedValue)) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}contain "${assertion.value}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
     };
   }
   if (baseType === 'contains-any') {
-    invariant(assertion.value, '"contains-any" assertion type must have a value');
+    invariant(renderedValue, '"contains-any" assertion type must have a value');
     invariant(
-      Array.isArray(assertion.value),
+      Array.isArray(renderedValue),
       '"contains-any" assertion type must have an array value',
     );
-    pass = assertion.value.some((value) => output.includes(value)) !== inverse;
+    pass = renderedValue.some((value) => output.includes(value)) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}contain one of "${assertion.value.join(
-            ', ',
-          )}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
     };
   }
   if (baseType === 'contains-all') {
-    invariant(assertion.value, '"contains-all" assertion type must have a value');
+    invariant(renderedValue, '"contains-all" assertion type must have a value');
     invariant(
-      Array.isArray(assertion.value),
+      Array.isArray(renderedValue),
       '"contains-all" assertion type must have an array value',
     );
-    pass = assertion.value.every((value) => output.includes(value)) !== inverse;
+    pass = renderedValue.every((value) => output.includes(value)) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}contain all of "${assertion.value.join(
-            ', ',
-          )}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}contain all of "${renderedValue.join(', ')}"`,
     };
   }
   if (baseType === 'regex') {
-    invariant(assertion.value, '"regex" assertion type must have a string value');
+    invariant(renderedValue, '"regex" assertion type must have a string value');
     invariant(
-      typeof assertion.value === 'string',
+      typeof renderedValue === 'string',
       '"contains" assertion type must have a string value',
     );
-    const regex = new RegExp(assertion.value);
+    const regex = new RegExp(renderedValue);
     pass = regex.test(output) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}match regex "${assertion.value}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}match regex "${renderedValue}"`,
     };
   }
   if (baseType === 'icontains') {
-    invariant(assertion.value, '"icontains" assertion type must have a string or number value');
+    invariant(renderedValue, '"icontains" assertion type must have a string or number value');
     invariant(
-      typeof assertion.value === 'string' || typeof assertion.value === 'number',
+      typeof renderedValue === 'string' || typeof renderedValue === 'number',
       '"icontains" assertion type must have a string or number value',
     );
-    pass = output.toLowerCase().includes(String(assertion.value).toLowerCase()) !== inverse;
+    pass = output.toLowerCase().includes(String(renderedValue).toLowerCase()) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}contain "${assertion.value}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
     };
   }
   if (baseType === 'starts-with') {
-    invariant(assertion.value, '"starts-with" assertion type must have a string value');
+    invariant(renderedValue, '"starts-with" assertion type must have a string value');
     invariant(
-      typeof assertion.value === 'string',
+      typeof renderedValue === 'string',
       '"starts-with" assertion type must have a string value',
     );
-    pass = output.startsWith(String(assertion.value)) !== inverse;
+    pass = output.startsWith(String(renderedValue)) !== inverse;
     return {
       pass,
       score: pass ? 1 : 0,
       reason: pass
         ? 'Assertion passed'
-        : `Expected output to ${inverse ? 'not ' : ''}start with "${assertion.value}"`,
+        : `Expected output to ${inverse ? 'not ' : ''}start with "${renderedValue}"`,
     };
   }
@@ -234,12 +239,16 @@ export async function runAssertion(
     };
   }
+  const context = {
+    vars: test.vars || {},
+  };
   if (baseType === 'javascript') {
     try {
-      const customFunction = new Function('output', 'context', `return ${assertion.value}`);
-      const context = {
-        vars: test.vars || {},
-      };
+      if (typeof assertion.value === 'function') {
+        return assertion.value(output, test, assertion);
+      }
+      const customFunction = new Function('output', 'context', `return ${renderedValue}`);
       const result = customFunction(output, context) as any;
       if (typeof result === 'boolean') {
         pass = result !== inverse;
@@ -255,7 +264,7 @@ export async function runAssertion(
         pass: false,
         score: 0,
         reason: `Custom function threw error: ${(err as Error).message}
-${assertion.value}`,
+${renderedValue}`,
       };
     }
     return {
@@ -264,41 +273,82 @@ ${assertion.value}`,
       reason: pass
         ? 'Assertion passed'
         : `Custom function returned ${inverse ? 'true' : 'false'}
+${renderedValue}`,
+    };
+  }
+  if (baseType === 'python') {
+    try {
+      const { execSync } = require('child_process');
+      const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
+      const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
+      const result = execSync(
+        `python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
+      )
+        .toString()
+        .trim();
+      if (result === 'true') {
+        pass = true;
+        score = 1.0;
+      } else if (result === 'false') {
+        pass = false;
+        score = 0.0;
+      } else if (result.startsWith('{')) {
+        return JSON.parse(result);
+      } else {
+        pass = true;
+        score = parseFloat(result);
+        if (isNaN(score)) {
+          throw new Error(
+            'Python code must return a boolean, number, or {pass, score, reason} object',
+          );
+        }
+      }
+    } catch (err) {
+      return {
+        pass: false,
+        score: 0,
+        reason: `Python code execution failed: ${(err as Error).message}`,
+      };
+    }
+    return {
+      pass,
+      score,
+      reason: pass
+        ? 'Assertion passed'
+        : `Python code returned ${pass ? 'true' : 'false'}
 ${assertion.value}`,
     };
   }
   if (baseType === 'similar') {
-    invariant(assertion.value, 'Similarity assertion must have a string value');
+    invariant(renderedValue, 'Similarity assertion must have a string value');
     invariant(
-      typeof assertion.value === 'string',
+      typeof renderedValue === 'string',
       '"contains" assertion type must have a string value',
     );
-    return matchesSimilarity(assertion.value, output, assertion.threshold || 0.75, inverse);
+    return matchesSimilarity(renderedValue, output, assertion.threshold || 0.75, inverse);
   }
   if (baseType === 'llm-rubric') {
-    invariant(assertion.value, 'Similarity assertion must have a string value');
+    invariant(renderedValue, 'Similarity assertion must have a string value');
     invariant(
-      typeof assertion.value === 'string',
+      typeof renderedValue === 'string',
       '"contains" assertion type must have a string value',
     );
-    return matchesLlmRubric(assertion.value, output, test.options);
+    return matchesLlmRubric(renderedValue, output, test.options);
   }
   if (baseType === 'webhook') {
-    invariant(assertion.value, '"webhook" assertion type must have a URL value');
-    invariant(
-      typeof assertion.value === 'string',
-      '"webhook" assertion type must have a URL value',
-    );
+    invariant(renderedValue, '"webhook" assertion type must have a URL value');
+    invariant(typeof renderedValue === 'string', '"webhook" assertion type must have a URL value');
     try {
       const context = {
         vars: test.vars || {},
       };
       const response = await fetchWithRetries(
-        assertion.value,
+        renderedValue,
         {
           method: 'POST',
           headers: {
@@ -339,8 +389,11 @@ ${assertion.value}`,
   }
   if (baseType === 'rouge-n') {
-    invariant(assertion.value, '"rouge" assertion type must a value (string or string array)');
-    return handleRougeScore(baseType, assertion, assertion.value, output, inverse);
+    invariant(
+      typeof renderedValue === 'string' || Array.isArray(renderedValue),
+      '"rouge" assertion type must be a value (string or string array)',
+    );
+    return handleRougeScore(baseType, assertion, renderedValue, output, inverse);
   }
   throw new Error('Unknown assertion type: ' + assertion.type);

package/src/evaluator.ts CHANGED Viewed

@@ -255,10 +255,11 @@ class Evaluator {
     }
     // Aggregate all vars across test cases
-    const tests = (
+    let tests = (
       testSuite.tests && testSuite.tests.length > 0
         ? testSuite.tests
+        : testSuite.scenarios
+        ? []
         : [
             {
               // Dummy test for cases when we're only comparing raw prompts.
@@ -269,6 +270,35 @@ class Evaluator {
       return Object.assign(finalTestCase, test);
     });
+    // Build scenarios and add to tests
+    if (testSuite.scenarios && testSuite.scenarios.length > 0) {
+      for (const scenario of testSuite.scenarios) {
+        for (const data of scenario.config) {
+          // Merge defaultTest with scenario config
+          const scenarioTests = (
+            scenario.tests || [
+              {
+                // Dummy test for cases when we're only comparing raw prompts.
+              },
+            ]
+          ).map((test) => {
+            return {
+              ...testSuite.defaultTest,
+              ...data,
+              ...test,
+              vars: {
+                ...testSuite.defaultTest?.vars,
+                ...data.vars,
+                ...test.vars,
+              },
+            };
+          });
+          // Add scenario tests to tests
+          tests = tests.concat(scenarioTests);
+        }
+      }
+    }
     const varNames: Set<string> = new Set();
     const varsWithSpecialColsRemoved: Record<string, string | string[] | object>[] = [];
     for (const testCase of tests) {
@@ -352,8 +382,7 @@ class Evaluator {
     // Set up progress bar...
     let progressbar: SingleBar | undefined;
     if (options.showProgressBar) {
-      const totalNumRuns =
-        testSuite.prompts.length * testSuite.providers.length * (totalVarCombinations || 1);
+      const totalNumRuns = runEvalOptions.length;
       const cliProgress = await import('cli-progress');
       progressbar = new cliProgress.SingleBar(
         {

package/src/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import providers from './providers';
 import telemetry from './telemetry';
 import { evaluate as doEvaluate } from './evaluator';
 import { loadApiProviders } from './providers';
-import { readTests } from './util';
+import { readTests, writeOutput } from './util';
 import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
 export * from './types';
@@ -28,6 +28,11 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
   };
   telemetry.maybeShowNotice();
   const ret = await doEvaluate(constructedTestSuite, options);
+  if (testSuite.outputPath) {
+    writeOutput(testSuite.outputPath, ret, testSuite, null);
+  }
   await telemetry.send();
   return ret;
 }

package/src/main.ts CHANGED Viewed

@@ -281,6 +281,7 @@ async function main() {
         prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
         providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
         tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
+        scenarios: fileConfig.scenarios || defaultConfig.scenarios,
         sharing:
           process.env.PROMPTFOO_DISABLE_SHARING === '1'
             ? false
@@ -310,6 +311,18 @@ async function main() {
         config.tests,
         cmdObj.tests ? undefined : basePath,
       );
+      //parse testCases for each scenario
+      if (fileConfig.scenarios) {
+        for (const scenario of fileConfig.scenarios) {
+          const parsedScenarioTests: TestCase[] = await readTests(
+            scenario.tests,
+            cmdObj.tests ? undefined : basePath,
+          );
+          scenario.tests = parsedScenarioTests;
+        }
+      }
       const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
       if (parsedPrompts.length === 0) {
@@ -334,6 +347,7 @@ async function main() {
         providers: parsedProviders,
         providerPromptMap: parsedProviderPromptMap,
         tests: parsedTests,
+        scenarios: config.scenarios,
         defaultTest,
       };

package/src/providers/llama.ts ADDED Viewed

@@ -0,0 +1,95 @@
+import { fetchJsonWithCache } from '../cache';
+import { REQUEST_TIMEOUT_MS } from './shared';
+import type { ApiProvider, ProviderResponse } from '../types.js';
+interface LlamaCompletionOptions {
+  n_predict?: number;
+  temperature?: number;
+  top_k?: number;
+  top_p?: number;
+  n_keep?: number;
+  stop?: string[];
+  repeat_penalty?: number;
+  repeat_last_n?: number;
+  penalize_nl?: boolean;
+  presence_penalty?: number;
+  frequency_penalty?: number;
+  mirostat?: boolean;
+  mirostat_tau?: number;
+  mirostat_eta?: number;
+  seed?: number;
+  ignore_eos?: boolean;
+  logit_bias?: Record<string, number>;
+}
+export class LlamaProvider implements ApiProvider {
+  modelName: string;
+  options?: LlamaCompletionOptions;
+  constructor(modelName: string, options?: LlamaCompletionOptions) {
+    this.modelName = modelName;
+    this.options = options;
+  }
+  id(): string {
+    return `llama:${this.modelName}`;
+  }
+  toString(): string {
+    return `[Llama Provider ${this.modelName}]`;
+  }
+  async callApi(prompt: string, options?: LlamaCompletionOptions): Promise<ProviderResponse> {
+    options = Object.assign({}, this.options, options);
+    const body = {
+      prompt,
+      n_predict: options?.n_predict || 512,
+      temperature: options?.temperature,
+      top_k: options?.top_k,
+      top_p: options?.top_p,
+      n_keep: options?.n_keep,
+      stop: options?.stop,
+      repeat_penalty: options?.repeat_penalty,
+      repeat_last_n: options?.repeat_last_n,
+      penalize_nl: options?.penalize_nl,
+      presence_penalty: options?.presence_penalty,
+      frequency_penalty: options?.frequency_penalty,
+      mirostat: options?.mirostat,
+      mirostat_tau: options?.mirostat_tau,
+      mirostat_eta: options?.mirostat_eta,
+      seed: options?.seed,
+      ignore_eos: options?.ignore_eos,
+      logit_bias: options?.logit_bias,
+    };
+    let response;
+    try {
+      response = await fetchJsonWithCache(
+        `${process.env.LLAMA_BASE_URL || 'http://localhost:8080'}/completion`,
+        {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify(body),
+        },
+        REQUEST_TIMEOUT_MS,
+      );
+    } catch (err) {
+      return {
+        error: `API call error: ${String(err)}`,
+      };
+    }
+    try {
+      return {
+        output: response.data.content,
+      };
+    } catch (err) {
+      return {
+        error: `API response error: ${String(err)}: ${JSON.stringify(response.data)}`,
+      };
+    }
+  }
+}

package/src/providers.ts CHANGED Viewed

@@ -1,28 +1,47 @@
 import path from 'path';
-import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './types';
 import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
 import { AnthropicCompletionProvider } from './providers/anthropic';
 import { ReplicateProvider } from './providers/replicate';
 import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
+import { LlamaProvider } from './providers/llama';
 import { ScriptCompletionProvider } from './providers/scriptCompletion';
 import {
   AzureOpenAiChatCompletionProvider,
   AzureOpenAiCompletionProvider,
 } from './providers/azureopenai';
+import type {
+  ApiProvider,
+  ProviderConfig,
+  ProviderFunction,
+  ProviderId,
+  RawProviderConfig,
+} from './types';
 export async function loadApiProviders(
-  providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
+  providerPaths: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction,
   basePath?: string,
 ): Promise<ApiProvider[]> {
   if (typeof providerPaths === 'string') {
     return [await loadApiProvider(providerPaths, undefined, basePath)];
+  } else if (typeof providerPaths === 'function') {
+    return [
+      {
+        id: () => 'custom-function',
+        callApi: providerPaths,
+      },
+    ];
   } else if (Array.isArray(providerPaths)) {
     return Promise.all(
-      providerPaths.map((provider) => {
+      providerPaths.map((provider, idx) => {
         if (typeof provider === 'string') {
           return loadApiProvider(provider, undefined, basePath);
+        } else if (typeof provider === 'function') {
+          return {
+            id: () => `custom-function-${idx}`,
+            callApi: provider,
+          };
         } else {
           const id = Object.keys(provider)[0];
           const context = { ...provider[id], id };
@@ -115,7 +134,10 @@ export async function loadApiProvider(
     return new ReplicateProvider(modelName, undefined, context?.config);
   }
-  if (providerPath?.startsWith('localai:')) {
+  if (providerPath === 'llama' || providerPath.startsWith('llama:')) {
+    const modelName = providerPath.split(':')[1];
+    return new LlamaProvider(modelName, context?.config);
+  } else if (providerPath?.startsWith('localai:')) {
     const options = providerPath.split(':');
     const modelType = options[1];
     const modelName = options[2];

package/src/table.ts CHANGED Viewed

@@ -24,11 +24,11 @@ export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250
           text = text.slice(0, tableCellMaxLength) + '...';
         }
         if (pass) {
-          return chalk.green.bold('[PASS] ') + text;
+          return chalk.green('[PASS] ') + text;
         } else if (!pass) {
           // color everything red up until '---'
           return (
-            chalk.red.bold('[FAIL] ') +
+            chalk.red('[FAIL] ') +
             text
               .split('---')
               .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))

package/src/types.ts CHANGED Viewed

@@ -151,6 +151,7 @@ type BaseAssertionTypes =
   | 'is-json'
   | 'contains-json'
   | 'javascript'
+  | 'python'
   | 'similar'
   | 'llm-rubric'
   | 'webhook'
@@ -168,7 +169,10 @@ export interface Assertion {
   type: AssertionType;
   // The expected value, if applicable
-  value?: string | string[];
+  value?:
+    | string
+    | string[]
+    | ((output: string, testCase: AtomicTestCase, assertion: Assertion) => Promise<GradingResult>);
   // The threshold value, only applicable for similarity (cosine distance)
   threshold?: number;
@@ -188,9 +192,6 @@ export interface TestCase {
   // Key-value pairs to substitute in the prompt
   vars?: Record<string, string | string[] | object>;
-  // Optional filepath or glob pattern to load vars from
-  loadVars?: string | string[];
   // Optional list of automatic checks to run on the LLM output
   assert?: Assertion[];
@@ -198,6 +199,17 @@ export interface TestCase {
   options?: PromptConfig & OutputConfig & GradingConfig;
 }
+export interface Scenario {
+  // Optional description of what you're testing
+  description?: string;
+  // Default test case config
+  config: Partial<TestCase>[];
+  // Optional list of automatic checks to run on the LLM output
+  tests: TestCase[];
+}
 // Same as a TestCase, except the `vars` object has been flattened into its final form.
 export interface AtomicTestCase extends TestCase {
   vars?: Record<string, string | object>;
@@ -221,12 +233,17 @@ export interface TestSuite {
   // Test cases
   tests?: TestCase[];
+  // scenarios
+  scenarios?: Scenario[];
   // Default test case config
   defaultTest?: Partial<TestCase>;
 }
 export type ProviderId = string;
+export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
 export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
 // TestSuiteConfig = Test Suite, but before everything is parsed and resolved.  Providers are just strings, prompts are filepaths, tests can be filepath or inline.
@@ -235,7 +252,7 @@ export interface TestSuiteConfig {
   description?: string;
   // One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
-  providers: ProviderId | ProviderId[] | RawProviderConfig[];
+  providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction;
   // One or more prompt files to load
   prompts: string | string[];
@@ -243,6 +260,9 @@ export interface TestSuiteConfig {
   // Path to a test file, OR list of LLM prompt variations (aka "test case")
   tests: string | string[] | TestCase[];
+  // Scenarios, groupings of data and tests to be evaluated
+  scenarios?: Scenario[];
   // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
   defaultTest?: Omit<TestCase, 'description'>;