promptfoo 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +50 -40
  2. package/dist/assertions.d.ts +2 -2
  3. package/dist/assertions.d.ts.map +1 -1
  4. package/dist/assertions.js +186 -44
  5. package/dist/assertions.js.map +1 -1
  6. package/dist/cache.js +9 -9
  7. package/dist/cache.js.map +1 -1
  8. package/dist/evaluator.d.ts +1 -1
  9. package/dist/evaluator.d.ts.map +1 -1
  10. package/dist/evaluator.js +30 -23
  11. package/dist/evaluator.js.map +1 -1
  12. package/dist/index.d.ts +10 -10
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +18 -14
  15. package/dist/index.js.map +1 -1
  16. package/dist/main.js +49 -44
  17. package/dist/main.js.map +1 -1
  18. package/dist/providers/localai.js +11 -11
  19. package/dist/providers/localai.js.map +1 -1
  20. package/dist/providers/openai.d.ts.map +1 -1
  21. package/dist/providers/openai.js +30 -21
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/providers.d.ts +3 -3
  24. package/dist/providers.d.ts.map +1 -1
  25. package/dist/providers.js +15 -15
  26. package/dist/providers.js.map +1 -1
  27. package/dist/types.d.ts +7 -3
  28. package/dist/types.d.ts.map +1 -1
  29. package/dist/util.d.ts +4 -4
  30. package/dist/util.d.ts.map +1 -1
  31. package/dist/util.js +49 -18
  32. package/dist/util.js.map +1 -1
  33. package/dist/web/client/assets/index-15dfcd18.js +172 -0
  34. package/dist/web/client/assets/index-87905193.css +1 -0
  35. package/dist/web/client/index.html +2 -2
  36. package/dist/web/server.js +9 -9
  37. package/dist/web/server.js.map +1 -1
  38. package/package.json +3 -1
  39. package/src/assertions.ts +249 -38
  40. package/src/cache.ts +2 -2
  41. package/src/evaluator.ts +25 -18
  42. package/src/index.ts +13 -8
  43. package/src/main.ts +28 -15
  44. package/src/providers/localai.ts +3 -3
  45. package/src/providers/openai.ts +16 -8
  46. package/src/providers.ts +3 -3
  47. package/src/types.ts +24 -3
  48. package/src/util.ts +48 -17
  49. package/src/web/client/package-lock.json +5729 -0
  50. package/src/web/client/src/ResultsTable.css +35 -4
  51. package/src/web/client/src/ResultsTable.tsx +150 -70
  52. package/src/web/client/src/ResultsView.tsx +83 -18
  53. package/src/web/client/src/index.css +6 -0
  54. package/src/web/client/src/types.ts +2 -0
  55. package/src/web/server.ts +3 -3
  56. package/dist/web/client/assets/index-207192fc.css +0 -1
  57. package/dist/web/client/assets/index-8751749f.js +0 -172
package/src/main.ts CHANGED
@@ -6,9 +6,9 @@ import Table from 'cli-table3';
6
6
  import chalk from 'chalk';
7
7
  import { Command } from 'commander';
8
8
 
9
- import logger, { setLogLevel } from './logger.js';
10
- import { loadApiProvider, loadApiProviders } from './providers.js';
11
- import { evaluate } from './evaluator.js';
9
+ import logger, { setLogLevel } from './logger';
10
+ import { loadApiProvider, loadApiProviders } from './providers';
11
+ import { evaluate } from './evaluator';
12
12
  import {
13
13
  maybeReadConfig,
14
14
  readConfig,
@@ -16,10 +16,10 @@ import {
16
16
  readTests,
17
17
  writeLatestResults,
18
18
  writeOutput,
19
- } from './util.js';
20
- import { getDirectory } from './esm.js';
21
- import { init } from './web/server.js';
22
- import { disableCache } from './cache.js';
19
+ } from './util';
20
+ import { getDirectory } from './esm';
21
+ import { init } from './web/server';
22
+ import { disableCache } from './cache';
23
23
 
24
24
  import type {
25
25
  CommandLineOptions,
@@ -27,8 +27,8 @@ import type {
27
27
  TestCase,
28
28
  TestSuite,
29
29
  UnifiedConfig,
30
- } from './types.js';
31
- import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding.js';
30
+ } from './types';
31
+ import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
32
32
 
33
33
  function createDummyFiles(directory: string | null) {
34
34
  if (directory) {
@@ -68,7 +68,7 @@ async function main() {
68
68
  ];
69
69
  let config: Partial<UnifiedConfig> = {};
70
70
  for (const path of potentialPaths) {
71
- const maybeConfig = maybeReadConfig(path);
71
+ const maybeConfig = await maybeReadConfig(path);
72
72
  if (maybeConfig) {
73
73
  config = maybeConfig;
74
74
  break;
@@ -154,8 +154,16 @@ async function main() {
154
154
  'This suffix is append to every prompt',
155
155
  config.defaultTest?.options?.suffix,
156
156
  )
157
- .option('--no-write', 'Do not write results to promptfoo directory')
158
- .option('--no-cache', 'Do not read or write results to disk cache')
157
+ .option(
158
+ '--no-write',
159
+ 'Do not write results to promptfoo directory',
160
+ config?.commandLineOptions?.write,
161
+ )
162
+ .option(
163
+ '--no-cache',
164
+ 'Do not read or write results to disk cache',
165
+ config?.commandLineOptions?.cache,
166
+ )
159
167
  .option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
160
168
  .option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
161
169
  .option('--view [port]', 'View in browser ui')
@@ -172,12 +180,13 @@ async function main() {
172
180
  const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
173
181
  const configPath = cmdObj.config;
174
182
  if (configPath) {
175
- config = readConfig(configPath);
183
+ config = await readConfig(configPath);
176
184
  } else {
177
185
  config = {
178
186
  prompts: cmdObj.prompts || config.prompts,
179
187
  providers: cmdObj.providers || config.providers,
180
188
  tests: cmdObj.tests || cmdObj.vars || config.tests,
189
+ defaultTest: config.defaultTest,
181
190
  };
182
191
  }
183
192
 
@@ -255,8 +264,9 @@ async function main() {
255
264
  },
256
265
  });
257
266
  // Skip first row (header) and add the rest. Color PASS/FAIL
258
- for (const row of summary.table.body) {
267
+ for (const row of summary.table.body.slice(0, 25)) {
259
268
  table.push([
269
+ ...row.vars,
260
270
  ...row.outputs.map((col) => {
261
271
  const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
262
272
  if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
@@ -274,11 +284,14 @@ async function main() {
274
284
  }
275
285
  return col;
276
286
  }),
277
- ...row.vars,
278
287
  ]);
279
288
  }
280
289
 
281
290
  logger.info('\n' + table.toString());
291
+ if (summary.table.body.length > 25) {
292
+ const rowsLeft = summary.table.body.length - 25;
293
+ logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
294
+ }
282
295
  }
283
296
  if (cmdObj.view || !cmdObj.write) {
284
297
  logger.info('Evaluation complete');
@@ -1,6 +1,6 @@
1
- import logger from '../logger.js';
2
- import { fetchJsonWithCache } from '../cache.js';
3
- import { REQUEST_TIMEOUT_MS } from './shared.js';
1
+ import logger from '../logger';
2
+ import { fetchJsonWithCache } from '../cache';
3
+ import { REQUEST_TIMEOUT_MS } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderResponse } from '../types.js';
6
6
 
@@ -1,6 +1,6 @@
1
- import logger from '../logger.js';
2
- import { fetchJsonWithCache } from '../cache.js';
3
- import { REQUEST_TIMEOUT_MS } from './shared.js';
1
+ import logger from '../logger';
2
+ import { fetchJsonWithCache } from '../cache';
3
+ import { REQUEST_TIMEOUT_MS } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
6
6
 
@@ -126,12 +126,20 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
126
126
  );
127
127
  }
128
128
 
129
+ let stop: string;
130
+ try {
131
+ stop = process.env.OPENAI_STOP
132
+ ? JSON.parse(process.env.OPENAI_STOP)
133
+ : ['<|im_end|>', '<|endoftext|>'];
134
+ } catch (err) {
135
+ throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
136
+ }
129
137
  const body = {
130
138
  model: this.modelName,
131
139
  prompt,
132
- max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
133
- temperature: options?.temperature ?? (process.env.OPENAI_MAX_TEMPERATURE || 0),
134
- stop: process.env.OPENAI_STOP ? JSON.parse(process.env.OPENAI_STOP) : undefined,
140
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
141
+ temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
142
+ stop,
135
143
  };
136
144
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
137
145
  let data,
@@ -210,8 +218,8 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
210
218
  const body = {
211
219
  model: this.modelName,
212
220
  messages: messages,
213
- max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
214
- temperature: options?.temperature ?? (process.env.OPENAI_MAX_TEMPERATURE || 0),
221
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
222
+ temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
215
223
  };
216
224
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
217
225
 
package/src/providers.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  import path from 'node:path';
2
2
 
3
- import { ApiProvider } from './types.js';
3
+ import { ApiProvider } from './types';
4
4
 
5
- import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai.js';
6
- import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai.js';
5
+ import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
+ import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
7
7
 
8
8
  export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
9
9
  if (typeof providerPaths === 'string') {
package/src/types.ts CHANGED
@@ -113,13 +113,34 @@ export interface GradingResult {
113
113
  tokensUsed?: TokenUsage;
114
114
  }
115
115
 
116
+ type BaseAssertionTypes =
117
+ | 'equals'
118
+ | 'contains'
119
+ | 'icontains'
120
+ | 'contains-all'
121
+ | 'contains-any'
122
+ | 'regex'
123
+ | 'is-json'
124
+ | 'contains-json'
125
+ | 'javascript'
126
+ | 'similar'
127
+ | 'llm-rubric'
128
+ | 'webhook'
129
+ | 'rouge-n'
130
+ | 'rouge-s'
131
+ | 'rouge-l';
132
+
133
+ type NotPrefixed<T extends string> = `not-${T}`;
134
+
135
+ export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
136
+
116
137
  // TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
117
138
  export interface Assertion {
118
139
  // Type of assertion
119
- type: 'equals' | 'is-json' | 'contains-json' | 'javascript' | 'similar' | 'llm-rubric';
140
+ type: AssertionType;
120
141
 
121
142
  // The expected value, if applicable
122
- value?: string;
143
+ value?: string | string[];
123
144
 
124
145
  // The threshold value, only applicable for similarity (cosine distance)
125
146
  threshold?: number;
@@ -157,7 +178,7 @@ export interface TestSuite {
157
178
  providers: ApiProvider[];
158
179
 
159
180
  // One or more prompt strings
160
- prompts: string[];
181
+ prompts: Prompt[];
161
182
 
162
183
  // Test cases
163
184
  tests?: TestCase[];
package/src/util.ts CHANGED
@@ -2,6 +2,7 @@ import * as fs from 'fs';
2
2
  import * as path from 'node:path';
3
3
  import * as os from 'node:os';
4
4
 
5
+ import $RefParser from '@apidevtools/json-schema-ref-parser';
5
6
  import fetch from 'node-fetch';
6
7
  import yaml from 'js-yaml';
7
8
  import nunjucks from 'nunjucks';
@@ -10,13 +11,13 @@ import { parse as parsePath } from 'path';
10
11
  import { parse as parseCsv } from 'csv-parse/sync';
11
12
  import { stringify } from 'csv-stringify/sync';
12
13
 
13
- import logger from './logger.js';
14
- import { getDirectory } from './esm.js';
14
+ import logger from './logger';
15
+ import { getDirectory } from './esm';
15
16
 
16
17
  import type { RequestInfo, RequestInit, Response } from 'node-fetch';
17
18
 
18
- import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase } from './types.js';
19
- import { assertionFromString } from './assertions.js';
19
+ import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
20
+ import { assertionFromString } from './assertions';
20
21
 
21
22
  const PROMPT_DELIMITER = '---';
22
23
 
@@ -28,14 +29,14 @@ function parseJson(json: string): any | undefined {
28
29
  }
29
30
  }
30
31
 
31
- export function maybeReadConfig(configPath: string): UnifiedConfig | undefined {
32
+ export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
32
33
  if (!fs.existsSync(configPath)) {
33
34
  return undefined;
34
35
  }
35
36
  return readConfig(configPath);
36
37
  }
37
38
 
38
- export function readConfig(configPath: string): UnifiedConfig {
39
+ export async function readConfig(configPath: string): Promise<UnifiedConfig> {
39
40
  const ext = path.parse(configPath).ext;
40
41
  switch (ext) {
41
42
  case '.json':
@@ -44,17 +45,38 @@ export function readConfig(configPath: string): UnifiedConfig {
44
45
  case '.js':
45
46
  return require(configPath) as UnifiedConfig;
46
47
  case '.yaml':
47
- return yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
48
+ case '.yml':
49
+ let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
50
+ ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
51
+ return ret;
48
52
  default:
49
53
  throw new Error(`Unsupported configuration file format: ${ext}`);
50
54
  }
51
55
  }
52
56
 
53
- export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
54
- promptPathsOrGlobs =
55
- typeof promptPathsOrGlobs === 'string' ? [promptPathsOrGlobs] : promptPathsOrGlobs;
56
- const promptPaths = promptPathsOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
57
- let promptContents: string[] = [];
57
+ enum PromptInputType {
58
+ STRING = 1,
59
+ ARRAY = 2,
60
+ NAMED = 3,
61
+ }
62
+
63
+ export function readPrompts(
64
+ promptPathOrGlobs: string | string[] | Record<string, string>,
65
+ ): Prompt[] {
66
+ let promptPaths: string[] = [];
67
+ let promptContents: Prompt[] = [];
68
+
69
+ let inputType: PromptInputType | undefined;
70
+ if (typeof promptPathOrGlobs === 'string') {
71
+ promptPaths = [promptPathOrGlobs];
72
+ inputType = PromptInputType.STRING;
73
+ } else if (Array.isArray(promptPathOrGlobs)) {
74
+ promptPaths = promptPathOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
75
+ inputType = PromptInputType.ARRAY;
76
+ } else if (typeof promptPathOrGlobs === 'object') {
77
+ promptPaths = Object.keys(promptPathOrGlobs);
78
+ inputType = PromptInputType.NAMED;
79
+ }
58
80
 
59
81
  for (const promptPath of promptPaths) {
60
82
  const stat = fs.statSync(promptPath);
@@ -63,18 +85,27 @@ export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
63
85
  const fileContents = filesInDirectory.map((fileName) =>
64
86
  fs.readFileSync(path.join(promptPath, fileName), 'utf-8'),
65
87
  );
66
- promptContents.push(...fileContents);
88
+ promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
67
89
  } else {
68
90
  const fileContent = fs.readFileSync(promptPath, 'utf-8');
69
- promptContents.push(fileContent);
91
+ let display;
92
+ if (inputType === PromptInputType.NAMED) {
93
+ display = (promptPathOrGlobs as Record<string, string>)[promptPath];
94
+ } else {
95
+ display = fileContent.length > 200 ? promptPath : fileContent;
96
+ }
97
+ promptContents.push({ raw: fileContent, display });
70
98
  }
71
99
  }
72
100
 
73
- if (promptContents.length === 1) {
74
- promptContents = promptContents[0].split(PROMPT_DELIMITER).map((p) => p.trim());
101
+ if (promptContents.length === 1 && inputType !== PromptInputType.NAMED) {
102
+ const content = promptContents[0].raw;
103
+ promptContents = content
104
+ .split(PROMPT_DELIMITER)
105
+ .map((p) => ({ raw: p.trim(), display: p.trim() }));
75
106
  }
76
107
  if (promptContents.length === 0) {
77
- throw new Error(`There are no prompts in ${promptPathsOrGlobs.join(', ')}`);
108
+ throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
78
109
  }
79
110
  return promptContents;
80
111
  }