promptfoo 0.17.6 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/dist/package.json +2 -2
  2. package/dist/src/assertions.js +2 -2
  3. package/dist/src/assertions.js.map +1 -1
  4. package/dist/src/evaluator.d.ts.map +1 -1
  5. package/dist/src/evaluator.js +37 -6
  6. package/dist/src/evaluator.js.map +1 -1
  7. package/dist/src/main.js +4 -0
  8. package/dist/src/main.js.map +1 -1
  9. package/dist/src/providers/azureopenai.d.ts +4 -0
  10. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  11. package/dist/src/providers/azureopenai.js +15 -0
  12. package/dist/src/providers/azureopenai.js.map +1 -1
  13. package/dist/src/providers/openai.d.ts +5 -0
  14. package/dist/src/providers/openai.d.ts.map +1 -1
  15. package/dist/src/providers/openai.js +21 -2
  16. package/dist/src/providers/openai.js.map +1 -1
  17. package/dist/src/providers/replicate.d.ts +8 -1
  18. package/dist/src/providers/replicate.d.ts.map +1 -1
  19. package/dist/src/providers/replicate.js +9 -6
  20. package/dist/src/providers/replicate.js.map +1 -1
  21. package/dist/src/providers/shared.d.ts.map +1 -1
  22. package/dist/src/providers/shared.js.map +1 -1
  23. package/dist/src/providers.js +1 -1
  24. package/dist/src/providers.js.map +1 -1
  25. package/dist/src/types.d.ts +6 -1
  26. package/dist/src/types.d.ts.map +1 -1
  27. package/dist/src/util.d.ts +8 -1
  28. package/dist/src/util.d.ts.map +1 -1
  29. package/dist/src/util.js +81 -26
  30. package/dist/src/util.js.map +1 -1
  31. package/dist/src/web/client/assets/{index-13198388.js → index-0c6f887d.js} +25 -25
  32. package/dist/src/web/client/index.html +1 -1
  33. package/dist/src/web/server.d.ts.map +1 -1
  34. package/dist/src/web/server.js +26 -3
  35. package/dist/src/web/server.js.map +1 -1
  36. package/package.json +2 -2
  37. package/src/assertions.ts +2 -2
  38. package/src/evaluator.ts +42 -6
  39. package/src/main.ts +6 -0
  40. package/src/providers/azureopenai.ts +24 -0
  41. package/src/providers/openai.ts +33 -3
  42. package/src/providers/replicate.ts +20 -7
  43. package/src/providers/shared.ts +3 -1
  44. package/src/providers.ts +1 -1
  45. package/src/types.ts +10 -1
  46. package/src/util.ts +95 -27
  47. package/src/web/client/src/App.tsx +24 -1
  48. package/src/web/client/src/ResultsView.tsx +42 -3
  49. package/src/web/server.ts +33 -10
  50. package/src/web/client/package-lock.json +0 -5726
@@ -5,7 +5,7 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-13198388.js"></script>
8
+ <script type="module" crossorigin src="/assets/index-0c6f887d.js"></script>
9
9
  <link rel="stylesheet" href="/assets/index-f9b230d1.css">
10
10
  </head>
11
11
  <body>
@@ -1 +1 @@
1
- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QA0DhC"}
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QAiFhC"}
@@ -37,11 +37,34 @@ function init(port = 15500) {
37
37
  // Send the initial table data when a client connects
38
38
  socket.emit('init', readLatestJson());
39
39
  // Watch for changes to latest.json and emit the update event
40
- fs_1.default.watch(latestJsonPath, (0, debounce_1.default)((event) => {
41
- if (event === 'change') {
40
+ const watcher = (0, debounce_1.default)((curr, prev) => {
41
+ if (curr.mtime !== prev.mtime) {
42
42
  socket.emit('update', readLatestJson());
43
43
  }
44
- }, 250));
44
+ }, 250);
45
+ fs_1.default.watchFile(latestJsonPath, watcher);
46
+ // Stop watching the file when the socket connection is closed
47
+ socket.on('disconnect', () => {
48
+ fs_1.default.unwatchFile(latestJsonPath, watcher);
49
+ });
50
+ });
51
+ app.get('/results', (req, res) => {
52
+ const previousResults = (0, util_1.listPreviousResults)();
53
+ res.json({ data: previousResults });
54
+ });
55
+ app.get('/results/:filename', (req, res) => {
56
+ const filename = req.params.filename;
57
+ const safeFilename = node_path_1.default.basename(filename);
58
+ if (safeFilename !== filename || !(0, util_1.listPreviousResults)().includes(safeFilename)) {
59
+ res.status(400).send('Invalid filename');
60
+ return;
61
+ }
62
+ const result = (0, util_1.readResult)(safeFilename);
63
+ if (!result) {
64
+ res.status(404).send('Result not found');
65
+ return;
66
+ }
67
+ res.json({ data: result });
45
68
  });
46
69
  httpServer.listen(port, () => {
47
70
  const url = `http://localhost:${port}`;
@@ -1 +1 @@
1
- {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAA+C;AAE/C,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA1DD,oBA0DC"}
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAA+B;AAC/B,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAAgF;AAEhF,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,MAAM,OAAO,GAAG,IAAA,kBAAQ,EAAC,CAAC,IAAW,EAAE,IAAW,EAAE,EAAE;YACpD,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,EAAE;gBAC7B,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACR,YAAE,CAAC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAEtC,8DAA8D;QAC9D,MAAM,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE;YAC3B,YAAE,CAAC,WAAW,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC/B,MAAM,eAAe,GAAG,IAAA,0BAAmB,GAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QACzC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QACrC,MAAM,YAAY,GAAG,mBAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,YAAY,KAAK,QAAQ,IAAI,CAAC,IAAA,0BAAmB,GAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE;YAC9E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,MAAM,MAAM,GAAG,IAAA,iBAAU,EAAC,YAAY,CAAC,CAAC;QACxC,IAAI,CAAC,MAAM,EAAE;YACX,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAjFD,oBAiFC"}
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.6",
5
+ "version": "0.17.8",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
@@ -19,7 +19,7 @@
19
19
  "src"
20
20
  ],
21
21
  "engines": {
22
- "node": ">=12"
22
+ "node": ">=16"
23
23
  },
24
24
  "bin": {
25
25
  "promptfoo": "dist/src/main.js"
package/src/assertions.ts CHANGED
@@ -432,8 +432,8 @@ export async function matchesLlmRubric(
432
432
  }
433
433
 
434
434
  const prompt = nunjucks.renderString(options.rubricPrompt || DEFAULT_GRADING_PROMPT, {
435
- output,
436
- rubric: expected,
435
+ output: output.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
436
+ rubric: expected.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
437
437
  });
438
438
 
439
439
  let provider = options.provider || DefaultGradingProvider;
package/src/evaluator.ts CHANGED
@@ -3,6 +3,7 @@ import readline from 'readline';
3
3
  import async from 'async';
4
4
  import chalk from 'chalk';
5
5
  import nunjucks from 'nunjucks';
6
+ import invariant from 'tiny-invariant';
6
7
 
7
8
  import logger from './logger';
8
9
  import telemetry from './telemetry';
@@ -121,7 +122,23 @@ class Evaluator {
121
122
  if (response.error) {
122
123
  ret.error = response.error;
123
124
  } else if (response.output) {
124
- const checkResult = await runAssertions(test, response.output);
125
+ // Create a copy of response so we can potentially mutate it.
126
+ let processedResponse = { ...response };
127
+ if (test.options?.postprocess) {
128
+ const { postprocess } = test.options;
129
+ const postprocessFn = new Function(
130
+ 'output',
131
+ 'context',
132
+ postprocess.includes('\n') ? postprocess : `return ${postprocess}`,
133
+ );
134
+ processedResponse.output = postprocessFn(processedResponse.output);
135
+ if (processedResponse.output == null) {
136
+ throw new Error('Postprocess function did not return a value');
137
+ }
138
+ }
139
+
140
+ invariant(processedResponse.output != null, 'Response output should not be null');
141
+ const checkResult = await runAssertions(test, processedResponse.output);
125
142
  if (!checkResult.pass) {
126
143
  ret.error = checkResult.reason;
127
144
  }
@@ -132,6 +149,7 @@ class Evaluator {
132
149
  this.stats.tokenUsage.prompt += checkResult.tokensUsed.prompt;
133
150
  this.stats.tokenUsage.completion += checkResult.tokensUsed.completion;
134
151
  }
152
+ ret.response = processedResponse;
135
153
  } else {
136
154
  ret.success = false;
137
155
  ret.score = 0;
@@ -213,6 +231,13 @@ class Evaluator {
213
231
  // Split prompts by provider
214
232
  for (const prompt of testSuite.prompts) {
215
233
  for (const provider of testSuite.providers) {
234
+ // Check if providerPromptMap exists and if it contains the current prompt's display
235
+ if (testSuite.providerPromptMap) {
236
+ const allowedPrompts = testSuite.providerPromptMap[provider.id()];
237
+ if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
238
+ continue;
239
+ }
240
+ }
216
241
  const updatedDisplay =
217
242
  testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
218
243
  prompts.push({
@@ -225,11 +250,13 @@ class Evaluator {
225
250
  // Aggregate all vars across test cases
226
251
 
227
252
  const tests = (
228
- testSuite.tests || [
229
- {
230
- // Dummy test for cases when we're only comparing raw prompts.
231
- },
232
- ]
253
+ testSuite.tests && testSuite.tests.length > 0
254
+ ? testSuite.tests
255
+ : [
256
+ {
257
+ // Dummy test for cases when we're only comparing raw prompts.
258
+ },
259
+ ]
233
260
  ).map((test) => {
234
261
  const finalTestCase: TestCase = Object.assign({}, testSuite.defaultTest);
235
262
  return Object.assign(finalTestCase, test);
@@ -263,6 +290,8 @@ class Evaluator {
263
290
  testCase.options?.prefix || testSuite.defaultTest?.options?.prefix || '';
264
291
  const appendToPrompt =
265
292
  testCase.options?.suffix || testSuite.defaultTest?.options?.suffix || '';
293
+ testCase.options.postprocess =
294
+ testCase.options.postprocess || testSuite.defaultTest?.options?.postprocess;
266
295
 
267
296
  // Finalize test case eval
268
297
  const varCombinations = generateVarCombinations(testCase.vars || {});
@@ -274,6 +303,13 @@ class Evaluator {
274
303
  let colIndex = 0;
275
304
  for (const prompt of testSuite.prompts) {
276
305
  for (const provider of testSuite.providers) {
306
+ if (testSuite.providerPromptMap) {
307
+ const allowedPrompts = testSuite.providerPromptMap[provider.id()];
308
+ if (allowedPrompts && !allowedPrompts.includes(prompt.display)) {
309
+ // This prompt should not be used with this provider.
310
+ continue;
311
+ }
312
+ }
277
313
  runEvalOptions.push({
278
314
  provider,
279
315
  prompt: {
package/src/main.ts CHANGED
@@ -11,10 +11,12 @@ import logger, { getLogLevel, setLogLevel } from './logger';
11
11
  import { loadApiProvider, loadApiProviders } from './providers';
12
12
  import { evaluate } from './evaluator';
13
13
  import {
14
+ cleanupOldResults,
14
15
  maybeReadConfig,
15
16
  readConfig,
16
17
  readLatestResults,
17
18
  readPrompts,
19
+ readProviderPromptMap,
18
20
  readTests,
19
21
  writeLatestResults,
20
22
  writeOutput,
@@ -180,6 +182,7 @@ async function main() {
180
182
  .action(async () => {
181
183
  telemetry.maybeShowNotice();
182
184
  await clearCache();
185
+ cleanupOldResults(0);
183
186
  telemetry.record('command_used', {
184
187
  name: 'cache_clear',
185
188
  });
@@ -307,6 +310,7 @@ async function main() {
307
310
  config.tests,
308
311
  cmdObj.tests ? undefined : basePath,
309
312
  );
313
+ const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
310
314
 
311
315
  if (parsedPrompts.length === 0) {
312
316
  logger.error(chalk.red('No prompts found'));
@@ -319,6 +323,7 @@ async function main() {
319
323
  suffix: cmdObj.promptSuffix,
320
324
  provider: cmdObj.grader,
321
325
  // rubricPrompt:
326
+ // postprocess
322
327
  },
323
328
  ...config.defaultTest,
324
329
  };
@@ -327,6 +332,7 @@ async function main() {
327
332
  description: config.description,
328
333
  prompts: parsedPrompts,
329
334
  providers: parsedProviders,
335
+ providerPromptMap: parsedProviderPromptMap,
330
336
  tests: parsedTests,
331
337
  defaultTest,
332
338
  };
@@ -6,6 +6,10 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
6
6
 
7
7
  interface AzureOpenAiCompletionOptions {
8
8
  temperature?: number;
9
+ top_p?: number;
10
+ frequency_penalty?: number;
11
+ presence_penalty?: number;
12
+ best_of?: number;
9
13
  functions?: {
10
14
  name: string;
11
15
  description?: string;
@@ -144,6 +148,17 @@ export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
144
148
  options?.temperature ??
145
149
  this.options.temperature ??
146
150
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
151
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
152
+ presence_penalty:
153
+ options?.presence_penalty ??
154
+ this.options.presence_penalty ??
155
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
156
+ frequency_penalty:
157
+ options?.frequency_penalty ??
158
+ this.options.frequency_penalty ??
159
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
160
+ best_of:
161
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
147
162
  stop,
148
163
  };
149
164
  logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
@@ -214,6 +229,15 @@ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvide
214
229
  options?.temperature ??
215
230
  this.options.temperature ??
216
231
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
232
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
233
+ presence_penalty:
234
+ options?.presence_penalty ??
235
+ this.options.presence_penalty ??
236
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
237
+ frequency_penalty:
238
+ options?.frequency_penalty ??
239
+ this.options.frequency_penalty ??
240
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
217
241
  functions: options?.functions || this.options.functions || undefined,
218
242
  function_call: options?.function_call || this.options.function_call || undefined,
219
243
  };
@@ -1,4 +1,3 @@
1
-
2
1
  import logger from '../logger';
3
2
  import { fetchJsonWithCache } from '../cache';
4
3
  import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
@@ -9,6 +8,11 @@ const DEFAULT_OPENAI_HOST = 'api.openai.com';
9
8
 
10
9
  interface OpenAiCompletionOptions {
11
10
  temperature?: number;
11
+ max_tokens?: number;
12
+ top_p?: number;
13
+ frequency_penalty?: number;
14
+ presence_penalty?: number;
15
+ best_of?: number;
12
16
  functions?: {
13
17
  name: string;
14
18
  description?: string;
@@ -147,11 +151,25 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
147
151
  const body = {
148
152
  model: this.modelName,
149
153
  prompt,
150
- max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
154
+ max_tokens:
155
+ options?.max_tokens ??
156
+ this.options.max_tokens ??
157
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
151
158
  temperature:
152
159
  options?.temperature ??
153
160
  this.options.temperature ??
154
161
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
162
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
163
+ presence_penalty:
164
+ options?.presence_penalty ??
165
+ this.options.presence_penalty ??
166
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
167
+ frequency_penalty:
168
+ options?.frequency_penalty ??
169
+ this.options.frequency_penalty ??
170
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
171
+ best_of:
172
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
155
173
  stop,
156
174
  };
157
175
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
@@ -230,11 +248,23 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
230
248
  const body = {
231
249
  model: this.modelName,
232
250
  messages: messages,
233
- max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
251
+ max_tokens:
252
+ options?.max_tokens ??
253
+ this.options.max_tokens ??
254
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
234
255
  temperature:
235
256
  options?.temperature ??
236
257
  this.options.temperature ??
237
258
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
259
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
260
+ presence_penalty:
261
+ options?.presence_penalty ??
262
+ this.options.presence_penalty ??
263
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
264
+ frequency_penalty:
265
+ options?.frequency_penalty ??
266
+ this.options.frequency_penalty ??
267
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
238
268
  functions: options?.functions || this.options.functions || undefined,
239
269
  function_call: options?.function_call || this.options.function_call || undefined,
240
270
  };
@@ -6,14 +6,22 @@ import { getCache, isCacheEnabled } from '../cache';
6
6
 
7
7
  import type { ApiProvider, ProviderResponse } from '../types.js';
8
8
 
9
+ interface ReplicateCompletionOptions {
10
+ temperature?: number;
11
+ max_length?: number;
12
+ repetition_penalty?: number;
13
+ }
14
+
9
15
  export class ReplicateProvider implements ApiProvider {
10
16
  modelName: string;
11
17
  apiKey?: string;
12
18
  replicate: any;
19
+ options: ReplicateCompletionOptions;
13
20
 
14
- constructor(modelName: string, apiKey?: string) {
21
+ constructor(modelName: string, apiKey?: string, options?: ReplicateCompletionOptions) {
15
22
  this.modelName = modelName;
16
23
  this.apiKey = apiKey || process.env.REPLICATE_API_TOKEN || process.env.REPLICATE_API_KEY;
24
+ this.options = options || {};
17
25
  }
18
26
 
19
27
  id(): string {
@@ -24,7 +32,7 @@ export class ReplicateProvider implements ApiProvider {
24
32
  return `[Replicate Provider ${this.modelName}]`;
25
33
  }
26
34
 
27
- async callApi(prompt: string): Promise<ProviderResponse> {
35
+ async callApi(prompt: string): Promise<ProviderResponse> {
28
36
  if (!this.apiKey) {
29
37
  throw new Error(
30
38
  'Replicate API key is not set. Set REPLICATE_API_TOKEN environment variable or pass it as an argument to the constructor.',
@@ -54,14 +62,19 @@ export class ReplicateProvider implements ApiProvider {
54
62
  logger.debug(`Calling Replicate: ${prompt}`);
55
63
  let response;
56
64
  try {
57
- response = await replicate.run(this.modelName as any, {
65
+ const data = {
58
66
  input: {
59
67
  prompt,
60
- max_length: process.env.REPLICATE_MAX_LENGTH || 2046,
61
- temperature: process.env.REPLICATE_TEMPERATURE || 0.5,
62
- repetition_penalty: process.env.REPLICATE_REPETITION_PENALTY || 1.0,
68
+ max_length:
69
+ this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
70
+ temperature:
71
+ this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
72
+ repetition_penalty:
73
+ this.options.repetition_penalty ||
74
+ parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
63
75
  },
64
- });
76
+ };
77
+ response = await replicate.run(this.modelName as any, data);
65
78
  } catch (err) {
66
79
  return {
67
80
  error: `API call error: ${String(err)}`,
@@ -4,7 +4,9 @@ export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
4
4
  ? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
5
5
  : 300_000;
6
6
 
7
- export function parseChatPrompt(prompt: string): { role: string; content: string; name?: string }[] {
7
+ export function parseChatPrompt(
8
+ prompt: string,
9
+ ): { role: string; content: string; name?: string }[] {
8
10
  const trimmedPrompt = prompt.trim();
9
11
  if (trimmedPrompt.startsWith('- role:')) {
10
12
  try {
package/src/providers.ts CHANGED
@@ -112,7 +112,7 @@ export async function loadApiProvider(
112
112
  const options = providerPath.split(':');
113
113
  const modelName = options.slice(1).join(':');
114
114
 
115
- return new ReplicateProvider(modelName, undefined);
115
+ return new ReplicateProvider(modelName, undefined, context?.config);
116
116
  }
117
117
 
118
118
  if (providerPath?.startsWith('localai:')) {
package/src/types.ts CHANGED
@@ -30,6 +30,7 @@ export interface CommandLineOptions {
30
30
  export interface ProviderConfig {
31
31
  id: ProviderId;
32
32
  config?: any;
33
+ prompts?: string[]; // List of prompt display strings
33
34
  }
34
35
 
35
36
  export interface ApiProvider {
@@ -72,6 +73,10 @@ export interface PromptConfig {
72
73
  suffix?: string;
73
74
  }
74
75
 
76
+ export interface OutputConfig {
77
+ postprocess?: string;
78
+ }
79
+
75
80
  export interface EvaluateOptions {
76
81
  maxConcurrency?: number;
77
82
  showProgressBar?: boolean;
@@ -184,7 +189,7 @@ export interface TestCase {
184
189
  assert?: Assertion[];
185
190
 
186
191
  // Additional configuration settings for the prompt
187
- options?: PromptConfig & GradingConfig;
192
+ options?: PromptConfig & OutputConfig & GradingConfig;
188
193
  }
189
194
 
190
195
  // Same as a TestCase, except the `vars` object has been flattened into its final form.
@@ -203,6 +208,10 @@ export interface TestSuite {
203
208
  // One or more prompt strings
204
209
  prompts: Prompt[];
205
210
 
211
+ // Optional mapping of provider to prompt display strings. If not provided,
212
+ // all prompts are used for all providers.
213
+ providerPromptMap?: Record<string, string[]>;
214
+
206
215
  // Test cases
207
216
  tests?: TestCase[];
208
217
 
package/src/util.ts CHANGED
@@ -25,8 +25,36 @@ import type {
25
25
  UnifiedConfig,
26
26
  TestCase,
27
27
  Prompt,
28
+ RawProviderConfig,
29
+ TestSuite,
28
30
  } from './types';
29
31
 
32
+ export function readProviderPromptMap(
33
+ config: Partial<UnifiedConfig>,
34
+ parsedPrompts: Prompt[],
35
+ ): TestSuite['providerPromptMap'] {
36
+ const ret: Record<string, string[]> = {};
37
+
38
+ if (!config.providers) {
39
+ return ret;
40
+ }
41
+
42
+ const allPrompts = [];
43
+ for (const prompt of parsedPrompts) {
44
+ allPrompts.push(prompt.display);
45
+ }
46
+
47
+ for (const provider of config.providers) {
48
+ if (typeof provider === 'object') {
49
+ const rawProvider = provider as RawProviderConfig;
50
+ const id = Object.keys(rawProvider)[0];
51
+ ret[id] = rawProvider[id].prompts || allPrompts;
52
+ }
53
+ }
54
+
55
+ return ret;
56
+ }
57
+
30
58
  const PROMPT_DELIMITER = '---';
31
59
 
32
60
  function parseJson(json: string): any | undefined {
@@ -288,28 +316,31 @@ export function writeOutput(
288
316
  }
289
317
  }
290
318
 
291
- export async function fetchWithTimeout(
319
+ export function fetchWithTimeout(
292
320
  url: RequestInfo,
293
321
  options: RequestInit = {},
294
322
  timeout: number,
295
323
  ): Promise<Response> {
296
- const controller = new AbortController();
297
- const { signal } = controller;
298
- options.signal = signal;
299
-
300
- const timeoutId = setTimeout(() => {
301
- controller.abort();
302
- throw new Error(`Request timed out after ${timeout} ms`);
303
- }, timeout);
304
-
305
- try {
306
- const response = await fetch(url, options);
307
- clearTimeout(timeoutId);
308
- return response;
309
- } catch (error) {
310
- clearTimeout(timeoutId);
311
- throw error;
312
- }
324
+ return new Promise((resolve, reject) => {
325
+ const controller = new AbortController();
326
+ const { signal } = controller;
327
+ options.signal = signal;
328
+
329
+ const timeoutId = setTimeout(() => {
330
+ controller.abort();
331
+ reject(new Error(`Request timed out after ${timeout} ms`));
332
+ }, timeout);
333
+
334
+ fetch(url, options)
335
+ .then((response) => {
336
+ clearTimeout(timeoutId);
337
+ resolve(response);
338
+ })
339
+ .catch((error) => {
340
+ clearTimeout(timeoutId);
341
+ reject(error);
342
+ });
343
+ });
313
344
  }
314
345
 
315
346
  export async function fetchWithRetries(
@@ -331,6 +362,8 @@ export async function fetchWithRetries(
331
362
  throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
332
363
  }
333
364
 
365
+ const RESULT_HISTORY_LENGTH = 50;
366
+
334
367
  export function getConfigDirectoryPath(): string {
335
368
  return path.join(os.homedir(), '.promptfoo');
336
369
  }
@@ -340,11 +373,14 @@ export function getLatestResultsPath(): string {
340
373
  }
341
374
 
342
375
  export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
376
+ const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
377
+ const timestamp = new Date().toISOString();
378
+ const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
343
379
  const latestResultsPath = getLatestResultsPath();
344
380
  try {
345
- fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
381
+ fs.mkdirSync(resultsDirectory, { recursive: true });
346
382
  fs.writeFileSync(
347
- latestResultsPath,
383
+ newResultsPath,
348
384
  JSON.stringify(
349
385
  {
350
386
  version: 1,
@@ -355,8 +391,45 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
355
391
  2,
356
392
  ),
357
393
  );
394
+ if (fs.existsSync(latestResultsPath)) {
395
+ fs.unlinkSync(latestResultsPath);
396
+ }
397
+ fs.symlinkSync(newResultsPath, latestResultsPath);
398
+ cleanupOldResults();
358
399
  } catch (err) {
359
- logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
400
+ logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
401
+ }
402
+ }
403
+
404
+ export function listPreviousResults(): string[] {
405
+ const directory = path.join(getConfigDirectoryPath(), 'output');
406
+ const files = fs.readdirSync(directory);
407
+ const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
408
+ const sortedFiles = resultsFiles.sort((a, b) => {
409
+ const statA = fs.statSync(path.join(directory, a));
410
+ const statB = fs.statSync(path.join(directory, b));
411
+ return statB.birthtime.getTime() - statA.birthtime.getTime(); // sort in descending order
412
+ });
413
+ return sortedFiles;
414
+ }
415
+
416
+ export function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
417
+ const sortedFiles = listPreviousResults();
418
+ for (let i = 0; i < sortedFiles.length - remaining; i++) {
419
+ fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFiles[i]));
420
+ }
421
+ }
422
+
423
+ export function readResult(
424
+ name: string,
425
+ ): { results: EvaluateSummary; config: Partial<UnifiedConfig> } | undefined {
426
+ const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
427
+ const resultsPath = path.join(resultsDirectory, name);
428
+ try {
429
+ const results = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
430
+ return results;
431
+ } catch (err) {
432
+ logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
360
433
  }
361
434
  }
362
435
 
@@ -364,12 +437,7 @@ export function readLatestResults():
364
437
  | { results: EvaluateSummary; config: Partial<UnifiedConfig> }
365
438
  | undefined {
366
439
  const latestResultsPath = getLatestResultsPath();
367
- try {
368
- const latestResults = JSON.parse(fs.readFileSync(latestResultsPath, 'utf-8'));
369
- return latestResults;
370
- } catch (err) {
371
- logger.error(`Failed to read latest results from ${latestResultsPath}:\n${err}`);
372
- }
440
+ return readResult(latestResultsPath);
373
441
  }
374
442
 
375
443
  export function cosineSimilarity(vecA: number[], vecB: number[]) {