promptfoo 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/package.json +2 -2
  2. package/dist/src/cache.js +1 -1
  3. package/dist/src/cache.js.map +1 -1
  4. package/dist/src/evaluator.d.ts.map +1 -1
  5. package/dist/src/evaluator.js +26 -25
  6. package/dist/src/evaluator.js.map +1 -1
  7. package/dist/src/index.d.ts +4 -0
  8. package/dist/src/index.d.ts.map +1 -1
  9. package/dist/src/index.js +6 -5
  10. package/dist/src/index.js.map +1 -1
  11. package/dist/src/logger.d.ts +3 -2
  12. package/dist/src/logger.d.ts.map +1 -1
  13. package/dist/src/logger.js +13 -5
  14. package/dist/src/logger.js.map +1 -1
  15. package/dist/src/main.js +5 -40
  16. package/dist/src/main.js.map +1 -1
  17. package/dist/src/onboarding.d.ts +1 -1
  18. package/dist/src/onboarding.d.ts.map +1 -1
  19. package/dist/src/onboarding.js +6 -0
  20. package/dist/src/onboarding.js.map +1 -1
  21. package/dist/src/providers/openai.d.ts +11 -3
  22. package/dist/src/providers/openai.d.ts.map +1 -1
  23. package/dist/src/providers/openai.js +17 -5
  24. package/dist/src/providers/openai.js.map +1 -1
  25. package/dist/src/providers.d.ts +3 -3
  26. package/dist/src/providers.d.ts.map +1 -1
  27. package/dist/src/providers.js +16 -7
  28. package/dist/src/providers.js.map +1 -1
  29. package/dist/src/table.d.ts +4 -0
  30. package/dist/src/table.d.ts.map +1 -0
  31. package/dist/src/table.js +48 -0
  32. package/dist/src/table.js.map +1 -0
  33. package/dist/src/types.d.ts +7 -1
  34. package/dist/src/types.d.ts.map +1 -1
  35. package/dist/src/util.d.ts.map +1 -1
  36. package/dist/src/util.js +9 -0
  37. package/dist/src/util.js.map +1 -1
  38. package/dist/src/web/client/assets/{index-eb6d3769.js → index-0f6d6b29.js} +13 -13
  39. package/dist/src/web/client/index.html +1 -1
  40. package/package.json +2 -2
  41. package/src/cache.ts +1 -1
  42. package/src/evaluator.ts +34 -33
  43. package/src/index.ts +3 -6
  44. package/src/logger.ts +13 -5
  45. package/src/main.ts +6 -41
  46. package/src/onboarding.ts +6 -0
  47. package/src/providers/openai.ts +32 -6
  48. package/src/providers.ts +32 -9
  49. package/src/table.ts +41 -0
  50. package/src/types.ts +10 -1
  51. package/src/util.ts +12 -1
  52. package/src/web/client/src/App.tsx +5 -1
  53. package/src/web/client/src/ResultsView.tsx +12 -10
  54. package/src/web/client/package-lock.json +0 -5726
  55. /package/dist/{tableOutput.html → src/tableOutput.html} +0 -0
@@ -5,7 +5,7 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-eb6d3769.js"></script>
8
+ <script type="module" crossorigin src="/assets/index-0f6d6b29.js"></script>
9
9
  <link rel="stylesheet" href="/assets/index-87905193.css">
10
10
  </head>
11
11
  <body>
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "Prompt engineering toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.12.0",
5
+ "version": "0.13.0",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
@@ -30,7 +30,7 @@
30
30
  "build:clean": "rm -rf dist",
31
31
  "build:client": "cd src/web/client && npm run build && cp -r dist/ ../../../dist/src/web/client",
32
32
  "build:watch": "tsc --watch",
33
- "build": "tsc && cp src/*.html dist/ && npm run build:client && chmod +x dist/src/main.js",
33
+ "build": "tsc && cp src/*.html dist/src && npm run build:client && chmod +x dist/src/main.js",
34
34
  "prepare": "npm run install:client && npm run build:clean && npm run build",
35
35
  "test": "jest",
36
36
  "test:watch": "jest --watch",
package/src/cache.ts CHANGED
@@ -77,7 +77,7 @@ export async function fetchJsonWithCache(
77
77
  try {
78
78
  const data = await response.json();
79
79
  if (response.ok) {
80
- logger.debug(`Storing ${url} response in cache: ${data}`);
80
+ logger.debug(`Storing ${url} response in cache: ${JSON.stringify(data)}`);
81
81
  await cache.set(cacheKey, JSON.stringify(data));
82
82
  }
83
83
  return {
package/src/evaluator.ts CHANGED
@@ -237,41 +237,9 @@ class Evaluator {
237
237
  }
238
238
  }
239
239
 
240
- // Set up table...
241
- const isTest = tests.some((t) => !!t.assert);
242
-
243
- const table: EvaluateTable = {
244
- head: {
245
- prompts: prompts.map((p) => p.display),
246
- vars: Array.from(varNames).sort(),
247
- // TODO(ian): add assertions to table?
248
- },
249
- body: [],
250
- };
251
-
252
- // And progress bar...
253
- let progressbar: SingleBar | undefined;
254
- if (options.showProgressBar) {
255
- // FIXME(ian): Add var combinations too
256
- const totalNumRuns =
257
- testSuite.prompts.length * testSuite.providers.length * (tests.length || 1);
258
- const cliProgress = await import('cli-progress');
259
- progressbar = new cliProgress.SingleBar(
260
- {
261
- format:
262
- 'Eval: [{bar}] {percentage}% | ETA: {eta}s | {value}/{total} | {provider} "{prompt}" {vars}',
263
- },
264
- cliProgress.Presets.shades_classic,
265
- );
266
- progressbar.start(totalNumRuns, 0, {
267
- provider: '',
268
- prompt: '',
269
- vars: '',
270
- });
271
- }
272
-
273
240
  // Set up eval cases
274
241
  const runEvalOptions: RunEvalOptions[] = [];
242
+ let totalVarCombinations = 0;
275
243
  let rowIndex = 0;
276
244
  for (const testCase of tests) {
277
245
  // Handle default properties
@@ -287,6 +255,7 @@ class Evaluator {
287
255
 
288
256
  // Finalize test case eval
289
257
  const varCombinations = generateVarCombinations(testCase.vars || {});
258
+ totalVarCombinations += varCombinations.length;
290
259
  for (const vars of varCombinations) {
291
260
  let colIndex = 0;
292
261
  for (const prompt of testSuite.prompts) {
@@ -309,6 +278,38 @@ class Evaluator {
309
278
  }
310
279
  }
311
280
 
281
+ // Set up table...
282
+ const isTest = tests.some((t) => !!t.assert);
283
+
284
+ const table: EvaluateTable = {
285
+ head: {
286
+ prompts: prompts.map((p) => p.display),
287
+ vars: Array.from(varNames).sort(),
288
+ // TODO(ian): add assertions to table?
289
+ },
290
+ body: [],
291
+ };
292
+
293
+ // Set up progress bar...
294
+ let progressbar: SingleBar | undefined;
295
+ if (options.showProgressBar) {
296
+ const totalNumRuns =
297
+ testSuite.prompts.length * testSuite.providers.length * (totalVarCombinations || 1);
298
+ const cliProgress = await import('cli-progress');
299
+ progressbar = new cliProgress.SingleBar(
300
+ {
301
+ format:
302
+ 'Eval: [{bar}] {percentage}% | ETA: {eta}s | {value}/{total} | {provider} "{prompt}" {vars}',
303
+ },
304
+ cliProgress.Presets.shades_classic,
305
+ );
306
+ progressbar.start(totalNumRuns, 0, {
307
+ provider: '',
308
+ prompt: '',
309
+ vars: '',
310
+ });
311
+ }
312
+
312
313
  // Actually run the eval
313
314
  const results: EvaluateResult[] = [];
314
315
  await async.forEachOfLimit(
package/src/index.ts CHANGED
@@ -4,11 +4,12 @@ import telemetry from './telemetry';
4
4
  import { evaluate as doEvaluate } from './evaluator';
5
5
  import { loadApiProviders } from './providers';
6
6
  import { readTests } from './util';
7
-
8
7
  import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
9
8
 
10
9
  export * from './types';
11
10
 
11
+ export { generateTable } from './table';
12
+
12
13
  interface EvaluateTestSuite extends TestSuiteConfig {
13
14
  prompts: string[];
14
15
  }
@@ -30,11 +31,7 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
30
31
  return ret;
31
32
  }
32
33
 
33
- module.exports = {
34
- evaluate,
35
- assertions,
36
- providers,
37
- };
34
+ export { evaluate, assertions, providers };
38
35
 
39
36
  export default {
40
37
  evaluate,
package/src/logger.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import chalk from 'chalk';
2
2
  import winston from 'winston';
3
3
 
4
- const logLevels = {
4
+ export const LOG_LEVELS = {
5
5
  error: 0,
6
6
  warn: 1,
7
7
  info: 2,
@@ -22,13 +22,21 @@ const customFormatter = winston.format.printf(({ level, message, ...args }) => {
22
22
  });
23
23
 
24
24
  const logger = winston.createLogger({
25
- levels: logLevels,
25
+ levels: LOG_LEVELS,
26
26
  format: winston.format.combine(winston.format.simple(), customFormatter),
27
- transports: [new winston.transports.Console()],
27
+ transports: [
28
+ new winston.transports.Console({
29
+ level: process.env.LOG_LEVEL || 'info',
30
+ }),
31
+ ],
28
32
  });
29
33
 
30
- export function setLogLevel(level: keyof typeof logLevels) {
31
- if (logLevels.hasOwnProperty(level)) {
34
+ export function getLogLevel() {
35
+ return logger.transports[0].level;
36
+ }
37
+
38
+ export function setLogLevel(level: keyof typeof LOG_LEVELS) {
39
+ if (LOG_LEVELS.hasOwnProperty(level)) {
32
40
  logger.transports[0].level = level;
33
41
  } else {
34
42
  throw new Error(`Invalid log level: ${level}`);
package/src/main.ts CHANGED
@@ -2,12 +2,11 @@
2
2
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
3
3
  import { join as pathJoin } from 'path';
4
4
 
5
- import Table from 'cli-table3';
6
5
  import chalk from 'chalk';
7
6
  import { Command } from 'commander';
8
7
 
9
8
  import telemetry from './telemetry';
10
- import logger, { setLogLevel } from './logger';
9
+ import logger, { getLogLevel, setLogLevel } from './logger';
11
10
  import { loadApiProvider, loadApiProviders } from './providers';
12
11
  import { evaluate } from './evaluator';
13
12
  import {
@@ -31,6 +30,7 @@ import type {
31
30
  TestSuite,
32
31
  UnifiedConfig,
33
32
  } from './types';
33
+ import { generateTable } from './table';
34
34
 
35
35
  function createDummyFiles(directory: string | null) {
36
36
  if (directory) {
@@ -123,10 +123,9 @@ async function main() {
123
123
  .command('eval')
124
124
  .description('Evaluate prompts')
125
125
  .requiredOption('-p, --prompts <paths...>', 'Paths to prompt files (.txt)', config.prompts)
126
- .requiredOption(
126
+ .option(
127
127
  '-r, --providers <name or path...>',
128
128
  'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
129
- config?.providers,
130
129
  )
131
130
  .option(
132
131
  '-c, --config <path>',
@@ -243,7 +242,7 @@ async function main() {
243
242
  };
244
243
 
245
244
  const options: EvaluateOptions = {
246
- showProgressBar: true,
245
+ showProgressBar: getLogLevel() !== 'debug',
247
246
  maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
248
247
  ...evaluateOptions,
249
248
  };
@@ -261,43 +260,9 @@ async function main() {
261
260
  if (cmdObj.output) {
262
261
  logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
263
262
  writeOutput(cmdObj.output, summary);
264
- } else {
263
+ } else if (getLogLevel() !== 'debug') {
265
264
  // Output table by default
266
- const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
267
- const head = summary.table.head;
268
- const headLength = head.prompts.length + head.vars.length;
269
- const table = new Table({
270
- head: [...head.prompts, ...head.vars],
271
- colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
272
- wordWrap: true,
273
- wrapOnWordBoundary: false,
274
- style: {
275
- head: ['blue', 'bold'],
276
- },
277
- });
278
- // Skip first row (header) and add the rest. Color PASS/FAIL
279
- for (const row of summary.table.body.slice(0, 25)) {
280
- table.push([
281
- ...row.vars,
282
- ...row.outputs.map((col) => {
283
- const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
284
- if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
285
- col = col.slice(0, tableCellMaxLength) + '...';
286
- }
287
- if (col.startsWith('[PASS]')) {
288
- // color '[PASS]' green
289
- return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
290
- } else if (col.startsWith('[FAIL]')) {
291
- // color everything red up until '---'
292
- return col
293
- .split('---')
294
- .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
295
- .join('---');
296
- }
297
- return col;
298
- }),
299
- ]);
300
- }
265
+ const table = generateTable(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
301
266
 
302
267
  logger.info('\n' + table.toString());
303
268
  if (summary.table.body.length > 25) {
package/src/onboarding.ts CHANGED
@@ -9,6 +9,12 @@ These prompts are nunjucks templates, so you can use logic like this:
9
9
  {{ var1 }}
10
10
  {% endif %}
11
11
  ---
12
+ [
13
+ {"role": "system", "content": "Use JSON too for more complex payloads"},
14
+ {"role": "user", "content": "Such as multi-shot prompts"}
15
+ {"role": "user", "content": "Variable substitution still works: {{ var3 }}"}
16
+ ]
17
+ ---
12
18
  If you prefer, you can break prompts into multiple files (make sure to edit promptfooconfig.yaml accordingly)
13
19
  `;
14
20
 
@@ -7,7 +7,13 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
7
7
  const DEFAULT_OPENAI_HOST = 'api.openai.com';
8
8
 
9
9
  interface OpenAiCompletionOptions {
10
- temperature: number;
10
+ temperature?: number;
11
+ functions?: {
12
+ name: string;
13
+ description?: string;
14
+ parameters: any;
15
+ }[];
16
+ function_call?: 'none' | 'auto';
11
17
  }
12
18
 
13
19
  class OpenAiGenericProvider implements ApiProvider {
@@ -112,11 +118,14 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
112
118
  'text-ada-001',
113
119
  ];
114
120
 
115
- constructor(modelName: string, apiKey?: string) {
121
+ options: OpenAiCompletionOptions;
122
+
123
+ constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
116
124
  if (!OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelName)) {
117
125
  logger.warn(`Using unknown OpenAI completion model: ${modelName}`);
118
126
  }
119
127
  super(modelName, apiKey);
128
+ this.options = context || {};
120
129
  }
121
130
 
122
131
  async callApi(prompt: string, options?: OpenAiCompletionOptions): Promise<ProviderResponse> {
@@ -138,7 +147,10 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
138
147
  model: this.modelName,
139
148
  prompt,
140
149
  max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
141
- temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
150
+ temperature:
151
+ options?.temperature ??
152
+ this.options.temperature ??
153
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
142
154
  stop,
143
155
  };
144
156
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
@@ -186,17 +198,22 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
186
198
  static OPENAI_CHAT_MODELS = [
187
199
  'gpt-4',
188
200
  'gpt-4-0314',
201
+ 'gpt-4-0613',
189
202
  'gpt-4-32k',
190
203
  'gpt-4-32k-0314',
191
204
  'gpt-3.5-turbo',
192
205
  'gpt-3.5-turbo-0301',
206
+ 'gpt-3.5-turbo-0613',
193
207
  ];
194
208
 
195
- constructor(modelName: string, apiKey?: string) {
209
+ options: OpenAiCompletionOptions;
210
+
211
+ constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
196
212
  if (!OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelName)) {
197
213
  logger.warn(`Using unknown OpenAI chat model: ${modelName}`);
198
214
  }
199
215
  super(modelName, apiKey);
216
+ this.options = context || {};
200
217
  }
201
218
 
202
219
  // TODO(ian): support passing in `messages` directly
@@ -215,11 +232,17 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
215
232
  } catch (err) {
216
233
  messages = [{ role: 'user', content: prompt }];
217
234
  }
235
+
218
236
  const body = {
219
237
  model: this.modelName,
220
238
  messages: messages,
221
239
  max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
222
- temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
240
+ temperature:
241
+ options?.temperature ??
242
+ this.options.temperature ??
243
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
244
+ functions: options?.functions || this.options.functions || undefined,
245
+ function_call: options?.function_call || this.options.function_call || undefined,
223
246
  };
224
247
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
225
248
 
@@ -246,8 +269,11 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
246
269
 
247
270
  logger.debug(`\tOpenAI API response: ${JSON.stringify(data)}`);
248
271
  try {
272
+ const message = data.choices[0].message;
273
+ const output =
274
+ message.content === null ? JSON.stringify(message.function_call) : message.content;
249
275
  return {
250
- output: data.choices[0].message.content,
276
+ output,
251
277
  tokenUsage: cached
252
278
  ? { cached: data.usage.total_tokens }
253
279
  : {
package/src/providers.ts CHANGED
@@ -1,20 +1,35 @@
1
1
  import path from 'node:path';
2
2
 
3
- import { ApiProvider } from './types';
3
+ import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './types';
4
4
 
5
5
  import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
6
  import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
7
7
 
8
- export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
8
+ export async function loadApiProviders(
9
+ providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
10
+ ): Promise<ApiProvider[]> {
9
11
  if (typeof providerPaths === 'string') {
10
12
  return [await loadApiProvider(providerPaths)];
11
13
  } else if (Array.isArray(providerPaths)) {
12
- return Promise.all(providerPaths.map((provider) => loadApiProvider(provider)));
14
+ return Promise.all(
15
+ providerPaths.map((provider) => {
16
+ if (typeof provider === 'string') {
17
+ return loadApiProvider(provider);
18
+ } else {
19
+ const id = Object.keys(provider)[0];
20
+ const context = { ...provider[id], id };
21
+ return loadApiProvider(id, context);
22
+ }
23
+ }),
24
+ );
13
25
  }
14
26
  throw new Error('Invalid providers list');
15
27
  }
16
28
 
17
- export async function loadApiProvider(providerPath: string): Promise<ApiProvider> {
29
+ export async function loadApiProvider(
30
+ providerPath: string,
31
+ context: ProviderConfig | undefined = undefined,
32
+ ): Promise<ApiProvider> {
18
33
  if (providerPath?.startsWith('openai:')) {
19
34
  // Load OpenAI module
20
35
  const options = providerPath.split(':');
@@ -22,13 +37,21 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
22
37
  const modelName = options[2];
23
38
 
24
39
  if (modelType === 'chat') {
25
- return new OpenAiChatCompletionProvider(modelName || 'gpt-3.5-turbo');
40
+ return new OpenAiChatCompletionProvider(
41
+ modelName || 'gpt-3.5-turbo',
42
+ undefined,
43
+ context?.config,
44
+ );
26
45
  } else if (modelType === 'completion') {
27
- return new OpenAiCompletionProvider(modelName || 'text-davinci-003');
46
+ return new OpenAiCompletionProvider(
47
+ modelName || 'text-davinci-003',
48
+ undefined,
49
+ context?.config,
50
+ );
28
51
  } else if (OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelType)) {
29
- return new OpenAiChatCompletionProvider(modelType);
52
+ return new OpenAiChatCompletionProvider(modelType, undefined, context?.config);
30
53
  } else if (OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelType)) {
31
- return new OpenAiCompletionProvider(modelType);
54
+ return new OpenAiCompletionProvider(modelType, undefined, context?.config);
32
55
  } else {
33
56
  throw new Error(
34
57
  `Unknown OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`,
@@ -52,7 +75,7 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
52
75
 
53
76
  // Load custom module
54
77
  const CustomApiProvider = (await import(path.join(process.cwd(), providerPath))).default;
55
- return new CustomApiProvider();
78
+ return new CustomApiProvider(context);
56
79
  }
57
80
 
58
81
  export default {
package/src/table.ts ADDED
@@ -0,0 +1,41 @@
1
+ import Table from 'cli-table3';
2
+ import chalk from 'chalk';
3
+ import type { EvaluateSummary } from './types';
4
+
5
+ export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250, maxRows = 25) {
6
+ const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
7
+ const head = summary.table.head;
8
+ const headLength = head.prompts.length + head.vars.length;
9
+ const table = new Table({
10
+ head: [...head.prompts, ...head.vars],
11
+ colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
12
+ wordWrap: true,
13
+ wrapOnWordBoundary: false,
14
+ style: {
15
+ head: ['blue', 'bold'],
16
+ },
17
+ });
18
+ // Skip first row (header) and add the rest. Color PASS/FAIL
19
+ for (const row of summary.table.body.slice(0, maxRows)) {
20
+ table.push([
21
+ ...row.vars,
22
+ ...row.outputs.map((col) => {
23
+ if (col.length > tableCellMaxLength) {
24
+ col = col.slice(0, tableCellMaxLength) + '...';
25
+ }
26
+ if (col.startsWith('[PASS]')) {
27
+ // color '[PASS]' green
28
+ return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
29
+ } else if (col.startsWith('[FAIL]')) {
30
+ // color everything red up until '---'
31
+ return col
32
+ .split('---')
33
+ .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
34
+ .join('---');
35
+ }
36
+ return col;
37
+ }),
38
+ ]);
39
+ }
40
+ return table;
41
+ }
package/src/types.ts CHANGED
@@ -23,6 +23,11 @@ export interface CommandLineOptions {
23
23
  promptSuffix?: string;
24
24
  }
25
25
 
26
+ export interface ProviderConfig {
27
+ id: ProviderId;
28
+ config?: any;
29
+ }
30
+
26
31
  export interface ApiProvider {
27
32
  id: () => string;
28
33
  callApi: (prompt: string) => Promise<ProviderResponse>;
@@ -187,13 +192,17 @@ export interface TestSuite {
187
192
  defaultTest?: Partial<TestCase>;
188
193
  }
189
194
 
195
+ export type ProviderId = string;
196
+
197
+ export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
198
+
190
199
  // TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
191
200
  export interface TestSuiteConfig {
192
201
  // Optional description of what your LLM is trying to do
193
202
  description?: string;
194
203
 
195
204
  // One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
196
- providers: string | string[];
205
+ providers: ProviderId | ProviderId[] | RawProviderConfig[];
197
206
 
198
207
  // One or more prompt files to load
199
208
  prompts: string | string[];
package/src/util.ts CHANGED
@@ -96,11 +96,22 @@ export function readPrompts(
96
96
  promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
97
97
  } else {
98
98
  const fileContent = fs.readFileSync(promptPath, 'utf-8');
99
- let display;
99
+
100
+ let display: string | undefined;
100
101
  if (inputType === PromptInputType.NAMED) {
101
102
  display = (promptPathOrGlobs as Record<string, string>)[promptPath];
102
103
  } else {
103
104
  display = fileContent.length > 200 ? promptPath : fileContent;
105
+
106
+ const ext = path.parse(promptPath).ext;
107
+ if (ext === '.jsonl') {
108
+ // Special case for JSONL file
109
+ const jsonLines = fileContent.split(/\r?\n/).filter((line) => line.length > 0);
110
+ for (const json of jsonLines) {
111
+ promptContents.push({ raw: json, display: json });
112
+ }
113
+ continue;
114
+ }
104
115
  }
105
116
  promptContents.push({ raw: fileContent, display });
106
117
  }
@@ -45,7 +45,11 @@ function App() {
45
45
  loadedFromApi.current = true;
46
46
  const response = await fetch(`https://api.promptfoo.dev/eval/${id}`);
47
47
  const body = await response.json();
48
- setTable(body.data.results.table);
48
+ setTable(
49
+ body.data.results?.table ||
50
+ // Backwards compatibility with <= 0.12.0
51
+ body.data.table,
52
+ );
49
53
  setConfig(body.data.config);
50
54
  setLoaded(true);
51
55
  };
@@ -38,7 +38,7 @@ const ResponsiveStack = styled(Stack)(({ theme }) => ({
38
38
  }));
39
39
 
40
40
  export default function ResultsView() {
41
- const { table } = useStore();
41
+ const { table, config } = useStore();
42
42
  const [maxTextLength, setMaxTextLength] = React.useState(250);
43
43
  const [columnVisibility, setColumnVisibility] = React.useState<VisibilityState>({});
44
44
  const [selectedColumns, setSelectedColumns] = React.useState<string[]>([]);
@@ -204,15 +204,17 @@ export default function ResultsView() {
204
204
  <Box flexGrow={1} />
205
205
  <Box display="flex" justifyContent="flex-end">
206
206
  <ResponsiveStack direction="row" spacing={2}>
207
- <Tooltip title="View config">
208
- <Button
209
- color="primary"
210
- onClick={() => setConfigModalOpen(true)}
211
- startIcon={<VisibilityIcon />}
212
- >
213
- Config
214
- </Button>
215
- </Tooltip>
207
+ {config && (
208
+ <Tooltip title="View config">
209
+ <Button
210
+ color="primary"
211
+ onClick={() => setConfigModalOpen(true)}
212
+ startIcon={<VisibilityIcon />}
213
+ >
214
+ Config
215
+ </Button>
216
+ </Tooltip>
217
+ )}
216
218
  <Tooltip title="Generate a unique URL that others can access">
217
219
  <Button
218
220
  color="primary"