npm - promptfoo - Versions diffs - 0.17.9 → 0.18.0 - Mend

promptfoo 0.17.9 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/package.json +1 -1
package/dist/src/assertions.d.ts.map +1 -1
package/dist/src/assertions.js +97 -42
package/dist/src/assertions.js.map +1 -1
package/dist/src/evaluator.d.ts.map +1 -1
package/dist/src/evaluator.js +35 -7
package/dist/src/evaluator.js.map +1 -1
package/dist/src/index.d.ts.map +1 -1
package/dist/src/index.js +3 -0
package/dist/src/index.js.map +1 -1
package/dist/src/main.js +9 -0
package/dist/src/main.js.map +1 -1
package/dist/src/providers.d.ts +2 -2
package/dist/src/providers.d.ts.map +1 -1
package/dist/src/providers.js +15 -1
package/dist/src/providers.js.map +1 -1
package/dist/src/table.js +2 -2
package/dist/src/table.js.map +1 -1
package/dist/src/types.d.ts +11 -4
package/dist/src/types.d.ts.map +1 -1
package/dist/src/util.d.ts.map +1 -1
package/dist/src/util.js +5 -2
package/dist/src/util.js.map +1 -1
package/package.json +1 -1
package/src/assertions.ts +102 -49
package/src/evaluator.ts +33 -4
package/src/index.ts +6 -1
package/src/main.ts +14 -0
package/src/providers.ts +22 -4
package/src/table.ts +2 -2
package/src/types.ts +25 -5
package/src/util.ts +12 -2

package/src/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import providers from './providers';
 import telemetry from './telemetry';
 import { evaluate as doEvaluate } from './evaluator';
 import { loadApiProviders } from './providers';
-import { readTests } from './util';
+import { readTests, writeOutput } from './util';
 import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
 export * from './types';
@@ -28,6 +28,11 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
   };
   telemetry.maybeShowNotice();
   const ret = await doEvaluate(constructedTestSuite, options);
+  if (testSuite.outputPath) {
+    writeOutput(testSuite.outputPath, ret, testSuite, null);
+  }
   await telemetry.send();
   return ret;
 }

package/src/main.ts CHANGED Viewed

@@ -281,6 +281,7 @@ async function main() {
         prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
         providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
         tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
+        scenarios: fileConfig.scenarios || defaultConfig.scenarios,
         sharing:
           process.env.PROMPTFOO_DISABLE_SHARING === '1'
             ? false
@@ -310,6 +311,18 @@ async function main() {
         config.tests,
         cmdObj.tests ? undefined : basePath,
       );
+      //parse testCases for each scenario
+      if (fileConfig.scenarios) {
+        for (const scenario of fileConfig.scenarios) {
+          const parsedScenarioTests: TestCase[] = await readTests(
+            scenario.tests,
+            cmdObj.tests ? undefined : basePath,
+          );
+          scenario.tests = parsedScenarioTests;
+        }
+      }
       const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
       if (parsedPrompts.length === 0) {
@@ -334,6 +347,7 @@ async function main() {
         providers: parsedProviders,
         providerPromptMap: parsedProviderPromptMap,
         tests: parsedTests,
+        scenarios: config.scenarios,
         defaultTest,
       };

package/src/providers.ts CHANGED Viewed

@@ -1,7 +1,5 @@
 import path from 'path';
-import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './types';
 import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
 import { AnthropicCompletionProvider } from './providers/anthropic';
 import { ReplicateProvider } from './providers/replicate';
@@ -12,17 +10,37 @@ import {
   AzureOpenAiCompletionProvider,
 } from './providers/azureopenai';
+import type {
+  ApiProvider,
+  ProviderConfig,
+  ProviderFunction,
+  ProviderId,
+  RawProviderConfig,
+} from './types';
 export async function loadApiProviders(
-  providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
+  providerPaths: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction,
   basePath?: string,
 ): Promise<ApiProvider[]> {
   if (typeof providerPaths === 'string') {
     return [await loadApiProvider(providerPaths, undefined, basePath)];
+  } else if (typeof providerPaths === 'function') {
+    return [
+      {
+        id: () => 'custom-function',
+        callApi: providerPaths,
+      },
+    ];
   } else if (Array.isArray(providerPaths)) {
     return Promise.all(
-      providerPaths.map((provider) => {
+      providerPaths.map((provider, idx) => {
         if (typeof provider === 'string') {
           return loadApiProvider(provider, undefined, basePath);
+        } else if (typeof provider === 'function') {
+          return {
+            id: () => `custom-function-${idx}`,
+            callApi: provider,
+          };
         } else {
           const id = Object.keys(provider)[0];
           const context = { ...provider[id], id };

package/src/table.ts CHANGED Viewed

@@ -24,11 +24,11 @@ export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250
           text = text.slice(0, tableCellMaxLength) + '...';
         }
         if (pass) {
-          return chalk.green.bold('[PASS] ') + text;
+          return chalk.green('[PASS] ') + text;
         } else if (!pass) {
           // color everything red up until '---'
           return (
-            chalk.red.bold('[FAIL] ') +
+            chalk.red('[FAIL] ') +
             text
               .split('---')
               .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))

package/src/types.ts CHANGED Viewed

@@ -151,6 +151,7 @@ type BaseAssertionTypes =
   | 'is-json'
   | 'contains-json'
   | 'javascript'
+  | 'python'
   | 'similar'
   | 'llm-rubric'
   | 'webhook'
@@ -168,7 +169,10 @@ export interface Assertion {
   type: AssertionType;
   // The expected value, if applicable
-  value?: string | string[];
+  value?:
+    | string
+    | string[]
+    | ((output: string, testCase: AtomicTestCase, assertion: Assertion) => Promise<GradingResult>);
   // The threshold value, only applicable for similarity (cosine distance)
   threshold?: number;
@@ -188,9 +192,6 @@ export interface TestCase {
   // Key-value pairs to substitute in the prompt
   vars?: Record<string, string | string[] | object>;
-  // Optional filepath or glob pattern to load vars from
-  loadVars?: string | string[];
   // Optional list of automatic checks to run on the LLM output
   assert?: Assertion[];
@@ -198,6 +199,17 @@ export interface TestCase {
   options?: PromptConfig & OutputConfig & GradingConfig;
 }
+export interface Scenario {
+  // Optional description of what you're testing
+  description?: string;
+  // Default test case config
+  config: Partial<TestCase>[];
+  // Optional list of automatic checks to run on the LLM output
+  tests: TestCase[];
+}
 // Same as a TestCase, except the `vars` object has been flattened into its final form.
 export interface AtomicTestCase extends TestCase {
   vars?: Record<string, string | object>;
@@ -221,12 +233,17 @@ export interface TestSuite {
   // Test cases
   tests?: TestCase[];
+  // scenarios
+  scenarios?: Scenario[];
   // Default test case config
   defaultTest?: Partial<TestCase>;
 }
 export type ProviderId = string;
+export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
 export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
 // TestSuiteConfig = Test Suite, but before everything is parsed and resolved.  Providers are just strings, prompts are filepaths, tests can be filepath or inline.
@@ -235,7 +252,7 @@ export interface TestSuiteConfig {
   description?: string;
   // One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
-  providers: ProviderId | ProviderId[] | RawProviderConfig[];
+  providers: ProviderId | ProviderId[] | RawProviderConfig[] | ProviderFunction;
   // One or more prompt files to load
   prompts: string | string[];
@@ -243,6 +260,9 @@ export interface TestSuiteConfig {
   // Path to a test file, OR list of LLM prompt variations (aka "test case")
   tests: string | string[] | TestCase[];
+  // Scenarios, groupings of data and tests to be evaluated
+  scenarios?: Scenario[];
   // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
   defaultTest?: Omit<TestCase, 'description'>;

package/src/util.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import * as os from 'os';
 import $RefParser from '@apidevtools/json-schema-ref-parser';
 import fetch from 'node-fetch';
+import invariant from 'tiny-invariant';
 import yaml from 'js-yaml';
 import nunjucks from 'nunjucks';
 import { globSync } from 'glob';
@@ -44,6 +45,15 @@ export function readProviderPromptMap(
     allPrompts.push(prompt.display);
   }
+  invariant(
+    typeof config.providers !== 'string',
+    'In order to use a provider-prompt map, config.providers should be an array of objects, not a string',
+  );
+  invariant(
+    typeof config.providers !== 'function',
+    'In order to use a provider-prompt map, config.providers should be an array of objects, not a function',
+  );
   for (const provider of config.providers) {
     if (typeof provider === 'object') {
       const rawProvider = provider as RawProviderConfig;
@@ -446,7 +456,7 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
         2,
       ),
     );
-    if (fs.existsSync(latestResultsPath)) {
+    if (fs.existsSync(latestResultsPath) || fs.lstatSync(latestResultsPath).isSymbolicLink()) {
       fs.unlinkSync(latestResultsPath);
     }
     fs.symlinkSync(newResultsPath, latestResultsPath);
@@ -463,7 +473,7 @@ export function listPreviousResults(): string[] {
   const sortedFiles = resultsFiles.sort((a, b) => {
     const statA = fs.statSync(path.join(directory, a));
     const statB = fs.statSync(path.join(directory, b));
-    return statB.birthtime.getTime() - statA.birthtime.getTime(); // sort in descending order
+    return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
   });
   return sortedFiles;
 }