promptfoo 0.17.7 → 0.17.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/evaluator.d.ts.map +1 -1
  3. package/dist/src/evaluator.js +23 -6
  4. package/dist/src/evaluator.js.map +1 -1
  5. package/dist/src/main.js +2 -0
  6. package/dist/src/main.js.map +1 -1
  7. package/dist/src/providers/azureopenai.d.ts +4 -0
  8. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  9. package/dist/src/providers/azureopenai.js +15 -0
  10. package/dist/src/providers/azureopenai.js.map +1 -1
  11. package/dist/src/providers/openai.d.ts +4 -0
  12. package/dist/src/providers/openai.d.ts.map +1 -1
  13. package/dist/src/providers/openai.js +21 -2
  14. package/dist/src/providers/openai.js.map +1 -1
  15. package/dist/src/providers/replicate.d.ts.map +1 -1
  16. package/dist/src/providers/replicate.js +2 -1
  17. package/dist/src/providers/replicate.js.map +1 -1
  18. package/dist/src/providers/shared.d.ts.map +1 -1
  19. package/dist/src/providers/shared.js.map +1 -1
  20. package/dist/src/types.d.ts +4 -1
  21. package/dist/src/types.d.ts.map +1 -1
  22. package/dist/src/util.d.ts +7 -1
  23. package/dist/src/util.d.ts.map +1 -1
  24. package/dist/src/util.js +62 -26
  25. package/dist/src/util.js.map +1 -1
  26. package/dist/src/web/client/assets/{index-13198388.js → index-0c6f887d.js} +25 -25
  27. package/dist/src/web/client/index.html +1 -1
  28. package/dist/src/web/server.d.ts.map +1 -1
  29. package/dist/src/web/server.js +26 -3
  30. package/dist/src/web/server.js.map +1 -1
  31. package/package.json +1 -1
  32. package/src/evaluator.ts +28 -6
  33. package/src/main.ts +3 -0
  34. package/src/providers/azureopenai.ts +24 -0
  35. package/src/providers/openai.ts +32 -3
  36. package/src/providers/replicate.ts +7 -3
  37. package/src/providers/shared.ts +3 -1
  38. package/src/types.ts +5 -1
  39. package/src/util.ts +71 -28
  40. package/src/web/client/src/App.tsx +24 -1
  41. package/src/web/client/src/ResultsView.tsx +42 -3
  42. package/src/web/server.ts +33 -10
  43. package/src/web/client/package-lock.json +0 -5726
@@ -5,7 +5,7 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-13198388.js"></script>
8
+ <script type="module" crossorigin src="/assets/index-0c6f887d.js"></script>
9
9
  <link rel="stylesheet" href="/assets/index-f9b230d1.css">
10
10
  </head>
11
11
  <body>
@@ -1 +1 @@
1
- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QA0DhC"}
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QAiFhC"}
@@ -37,11 +37,34 @@ function init(port = 15500) {
37
37
  // Send the initial table data when a client connects
38
38
  socket.emit('init', readLatestJson());
39
39
  // Watch for changes to latest.json and emit the update event
40
- fs_1.default.watch(latestJsonPath, (0, debounce_1.default)((event) => {
41
- if (event === 'change') {
40
+ const watcher = (0, debounce_1.default)((curr, prev) => {
41
+ if (curr.mtime !== prev.mtime) {
42
42
  socket.emit('update', readLatestJson());
43
43
  }
44
- }, 250));
44
+ }, 250);
45
+ fs_1.default.watchFile(latestJsonPath, watcher);
46
+ // Stop watching the file when the socket connection is closed
47
+ socket.on('disconnect', () => {
48
+ fs_1.default.unwatchFile(latestJsonPath, watcher);
49
+ });
50
+ });
51
+ app.get('/results', (req, res) => {
52
+ const previousResults = (0, util_1.listPreviousResults)();
53
+ res.json({ data: previousResults });
54
+ });
55
+ app.get('/results/:filename', (req, res) => {
56
+ const filename = req.params.filename;
57
+ const safeFilename = node_path_1.default.basename(filename);
58
+ if (safeFilename !== filename || !(0, util_1.listPreviousResults)().includes(safeFilename)) {
59
+ res.status(400).send('Invalid filename');
60
+ return;
61
+ }
62
+ const result = (0, util_1.readResult)(safeFilename);
63
+ if (!result) {
64
+ res.status(404).send('Result not found');
65
+ return;
66
+ }
67
+ res.json({ data: result });
45
68
  });
46
69
  httpServer.listen(port, () => {
47
70
  const url = `http://localhost:${port}`;
@@ -1 +1 @@
1
- {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAA+C;AAE/C,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA1DD,oBA0DC"}
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAA+B;AAC/B,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAAgF;AAEhF,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,MAAM,OAAO,GAAG,IAAA,kBAAQ,EAAC,CAAC,IAAW,EAAE,IAAW,EAAE,EAAE;YACpD,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,EAAE;gBAC7B,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACR,YAAE,CAAC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAEtC,8DAA8D;QAC9D,MAAM,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE;YAC3B,YAAE,CAAC,WAAW,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC/B,MAAM,eAAe,GAAG,IAAA,0BAAmB,GAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QACzC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QACrC,MAAM,YAAY,GAAG,mBAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,YAAY,KAAK,QAAQ,IAAI,CAAC,IAAA,0BAAmB,GAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE;YAC9E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,MAAM,MAAM,GAAG,IAAA,iBAAU,EAAC,YAAY,CAAC,CAAC;QACxC,IAAI,CAAC,MAAM,EAAE;YACX,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAjFD,oBAiFC"}
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.7",
5
+ "version": "0.17.8",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
package/src/evaluator.ts CHANGED
@@ -3,6 +3,7 @@ import readline from 'readline';
3
3
  import async from 'async';
4
4
  import chalk from 'chalk';
5
5
  import nunjucks from 'nunjucks';
6
+ import invariant from 'tiny-invariant';
6
7
 
7
8
  import logger from './logger';
8
9
  import telemetry from './telemetry';
@@ -121,7 +122,23 @@ class Evaluator {
121
122
  if (response.error) {
122
123
  ret.error = response.error;
123
124
  } else if (response.output) {
124
- const checkResult = await runAssertions(test, response.output);
125
+ // Create a copy of response so we can potentially mutate it.
126
+ let processedResponse = { ...response };
127
+ if (test.options?.postprocess) {
128
+ const { postprocess } = test.options;
129
+ const postprocessFn = new Function(
130
+ 'output',
131
+ 'context',
132
+ postprocess.includes('\n') ? postprocess : `return ${postprocess}`,
133
+ );
134
+ processedResponse.output = postprocessFn(processedResponse.output);
135
+ if (processedResponse.output == null) {
136
+ throw new Error('Postprocess function did not return a value');
137
+ }
138
+ }
139
+
140
+ invariant(processedResponse.output != null, 'Response output should not be null');
141
+ const checkResult = await runAssertions(test, processedResponse.output);
125
142
  if (!checkResult.pass) {
126
143
  ret.error = checkResult.reason;
127
144
  }
@@ -132,6 +149,7 @@ class Evaluator {
132
149
  this.stats.tokenUsage.prompt += checkResult.tokensUsed.prompt;
133
150
  this.stats.tokenUsage.completion += checkResult.tokensUsed.completion;
134
151
  }
152
+ ret.response = processedResponse;
135
153
  } else {
136
154
  ret.success = false;
137
155
  ret.score = 0;
@@ -232,11 +250,13 @@ class Evaluator {
232
250
  // Aggregate all vars across test cases
233
251
 
234
252
  const tests = (
235
- testSuite.tests || [
236
- {
237
- // Dummy test for cases when we're only comparing raw prompts.
238
- },
239
- ]
253
+ testSuite.tests && testSuite.tests.length > 0
254
+ ? testSuite.tests
255
+ : [
256
+ {
257
+ // Dummy test for cases when we're only comparing raw prompts.
258
+ },
259
+ ]
240
260
  ).map((test) => {
241
261
  const finalTestCase: TestCase = Object.assign({}, testSuite.defaultTest);
242
262
  return Object.assign(finalTestCase, test);
@@ -270,6 +290,8 @@ class Evaluator {
270
290
  testCase.options?.prefix || testSuite.defaultTest?.options?.prefix || '';
271
291
  const appendToPrompt =
272
292
  testCase.options?.suffix || testSuite.defaultTest?.options?.suffix || '';
293
+ testCase.options.postprocess =
294
+ testCase.options.postprocess || testSuite.defaultTest?.options?.postprocess;
273
295
 
274
296
  // Finalize test case eval
275
297
  const varCombinations = generateVarCombinations(testCase.vars || {});
package/src/main.ts CHANGED
@@ -11,6 +11,7 @@ import logger, { getLogLevel, setLogLevel } from './logger';
11
11
  import { loadApiProvider, loadApiProviders } from './providers';
12
12
  import { evaluate } from './evaluator';
13
13
  import {
14
+ cleanupOldResults,
14
15
  maybeReadConfig,
15
16
  readConfig,
16
17
  readLatestResults,
@@ -181,6 +182,7 @@ async function main() {
181
182
  .action(async () => {
182
183
  telemetry.maybeShowNotice();
183
184
  await clearCache();
185
+ cleanupOldResults(0);
184
186
  telemetry.record('command_used', {
185
187
  name: 'cache_clear',
186
188
  });
@@ -321,6 +323,7 @@ async function main() {
321
323
  suffix: cmdObj.promptSuffix,
322
324
  provider: cmdObj.grader,
323
325
  // rubricPrompt:
326
+ // postprocess
324
327
  },
325
328
  ...config.defaultTest,
326
329
  };
@@ -6,6 +6,10 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
6
6
 
7
7
  interface AzureOpenAiCompletionOptions {
8
8
  temperature?: number;
9
+ top_p?: number;
10
+ frequency_penalty?: number;
11
+ presence_penalty?: number;
12
+ best_of?: number;
9
13
  functions?: {
10
14
  name: string;
11
15
  description?: string;
@@ -144,6 +148,17 @@ export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
144
148
  options?.temperature ??
145
149
  this.options.temperature ??
146
150
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
151
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
152
+ presence_penalty:
153
+ options?.presence_penalty ??
154
+ this.options.presence_penalty ??
155
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
156
+ frequency_penalty:
157
+ options?.frequency_penalty ??
158
+ this.options.frequency_penalty ??
159
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
160
+ best_of:
161
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
147
162
  stop,
148
163
  };
149
164
  logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
@@ -214,6 +229,15 @@ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvide
214
229
  options?.temperature ??
215
230
  this.options.temperature ??
216
231
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
232
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
233
+ presence_penalty:
234
+ options?.presence_penalty ??
235
+ this.options.presence_penalty ??
236
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
237
+ frequency_penalty:
238
+ options?.frequency_penalty ??
239
+ this.options.frequency_penalty ??
240
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
217
241
  functions: options?.functions || this.options.functions || undefined,
218
242
  function_call: options?.function_call || this.options.function_call || undefined,
219
243
  };
@@ -1,4 +1,3 @@
1
-
2
1
  import logger from '../logger';
3
2
  import { fetchJsonWithCache } from '../cache';
4
3
  import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
@@ -10,6 +9,10 @@ const DEFAULT_OPENAI_HOST = 'api.openai.com';
10
9
  interface OpenAiCompletionOptions {
11
10
  temperature?: number;
12
11
  max_tokens?: number;
12
+ top_p?: number;
13
+ frequency_penalty?: number;
14
+ presence_penalty?: number;
15
+ best_of?: number;
13
16
  functions?: {
14
17
  name: string;
15
18
  description?: string;
@@ -148,11 +151,25 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
148
151
  const body = {
149
152
  model: this.modelName,
150
153
  prompt,
151
- max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
154
+ max_tokens:
155
+ options?.max_tokens ??
156
+ this.options.max_tokens ??
157
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
152
158
  temperature:
153
159
  options?.temperature ??
154
160
  this.options.temperature ??
155
161
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
162
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
163
+ presence_penalty:
164
+ options?.presence_penalty ??
165
+ this.options.presence_penalty ??
166
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
167
+ frequency_penalty:
168
+ options?.frequency_penalty ??
169
+ this.options.frequency_penalty ??
170
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
171
+ best_of:
172
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
156
173
  stop,
157
174
  };
158
175
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
@@ -231,11 +248,23 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
231
248
  const body = {
232
249
  model: this.modelName,
233
250
  messages: messages,
234
- max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
251
+ max_tokens:
252
+ options?.max_tokens ??
253
+ this.options.max_tokens ??
254
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
235
255
  temperature:
236
256
  options?.temperature ??
237
257
  this.options.temperature ??
238
258
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
259
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
260
+ presence_penalty:
261
+ options?.presence_penalty ??
262
+ this.options.presence_penalty ??
263
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
264
+ frequency_penalty:
265
+ options?.frequency_penalty ??
266
+ this.options.frequency_penalty ??
267
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
239
268
  functions: options?.functions || this.options.functions || undefined,
240
269
  function_call: options?.function_call || this.options.function_call || undefined,
241
270
  };
@@ -65,9 +65,13 @@ export class ReplicateProvider implements ApiProvider {
65
65
  const data = {
66
66
  input: {
67
67
  prompt,
68
- max_length: this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
69
- temperature: this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
70
- repetition_penalty: this.options.repetition_penalty || parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
68
+ max_length:
69
+ this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
70
+ temperature:
71
+ this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
72
+ repetition_penalty:
73
+ this.options.repetition_penalty ||
74
+ parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
71
75
  },
72
76
  };
73
77
  response = await replicate.run(this.modelName as any, data);
@@ -4,7 +4,9 @@ export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
4
4
  ? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
5
5
  : 300_000;
6
6
 
7
- export function parseChatPrompt(prompt: string): { role: string; content: string; name?: string }[] {
7
+ export function parseChatPrompt(
8
+ prompt: string,
9
+ ): { role: string; content: string; name?: string }[] {
8
10
  const trimmedPrompt = prompt.trim();
9
11
  if (trimmedPrompt.startsWith('- role:')) {
10
12
  try {
package/src/types.ts CHANGED
@@ -73,6 +73,10 @@ export interface PromptConfig {
73
73
  suffix?: string;
74
74
  }
75
75
 
76
+ export interface OutputConfig {
77
+ postprocess?: string;
78
+ }
79
+
76
80
  export interface EvaluateOptions {
77
81
  maxConcurrency?: number;
78
82
  showProgressBar?: boolean;
@@ -185,7 +189,7 @@ export interface TestCase {
185
189
  assert?: Assertion[];
186
190
 
187
191
  // Additional configuration settings for the prompt
188
- options?: PromptConfig & GradingConfig;
192
+ options?: PromptConfig & OutputConfig & GradingConfig;
189
193
  }
190
194
 
191
195
  // Same as a TestCase, except the `vars` object has been flattened into its final form.
package/src/util.ts CHANGED
@@ -29,7 +29,10 @@ import type {
29
29
  TestSuite,
30
30
  } from './types';
31
31
 
32
- export function readProviderPromptMap(config: Partial<UnifiedConfig>, parsedPrompts: Prompt[]): TestSuite["providerPromptMap"] {
32
+ export function readProviderPromptMap(
33
+ config: Partial<UnifiedConfig>,
34
+ parsedPrompts: Prompt[],
35
+ ): TestSuite['providerPromptMap'] {
33
36
  const ret: Record<string, string[]> = {};
34
37
 
35
38
  if (!config.providers) {
@@ -313,28 +316,31 @@ export function writeOutput(
313
316
  }
314
317
  }
315
318
 
316
- export async function fetchWithTimeout(
319
+ export function fetchWithTimeout(
317
320
  url: RequestInfo,
318
321
  options: RequestInit = {},
319
322
  timeout: number,
320
323
  ): Promise<Response> {
321
- const controller = new AbortController();
322
- const { signal } = controller;
323
- options.signal = signal;
324
-
325
- const timeoutId = setTimeout(() => {
326
- controller.abort();
327
- throw new Error(`Request timed out after ${timeout} ms`);
328
- }, timeout);
329
-
330
- try {
331
- const response = await fetch(url, options);
332
- clearTimeout(timeoutId);
333
- return response;
334
- } catch (error) {
335
- clearTimeout(timeoutId);
336
- throw error;
337
- }
324
+ return new Promise((resolve, reject) => {
325
+ const controller = new AbortController();
326
+ const { signal } = controller;
327
+ options.signal = signal;
328
+
329
+ const timeoutId = setTimeout(() => {
330
+ controller.abort();
331
+ reject(new Error(`Request timed out after ${timeout} ms`));
332
+ }, timeout);
333
+
334
+ fetch(url, options)
335
+ .then((response) => {
336
+ clearTimeout(timeoutId);
337
+ resolve(response);
338
+ })
339
+ .catch((error) => {
340
+ clearTimeout(timeoutId);
341
+ reject(error);
342
+ });
343
+ });
338
344
  }
339
345
 
340
346
  export async function fetchWithRetries(
@@ -356,6 +362,8 @@ export async function fetchWithRetries(
356
362
  throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
357
363
  }
358
364
 
365
+ const RESULT_HISTORY_LENGTH = 50;
366
+
359
367
  export function getConfigDirectoryPath(): string {
360
368
  return path.join(os.homedir(), '.promptfoo');
361
369
  }
@@ -365,11 +373,14 @@ export function getLatestResultsPath(): string {
365
373
  }
366
374
 
367
375
  export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
376
+ const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
377
+ const timestamp = new Date().toISOString();
378
+ const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
368
379
  const latestResultsPath = getLatestResultsPath();
369
380
  try {
370
- fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
381
+ fs.mkdirSync(resultsDirectory, { recursive: true });
371
382
  fs.writeFileSync(
372
- latestResultsPath,
383
+ newResultsPath,
373
384
  JSON.stringify(
374
385
  {
375
386
  version: 1,
@@ -380,8 +391,45 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
380
391
  2,
381
392
  ),
382
393
  );
394
+ if (fs.existsSync(latestResultsPath)) {
395
+ fs.unlinkSync(latestResultsPath);
396
+ }
397
+ fs.symlinkSync(newResultsPath, latestResultsPath);
398
+ cleanupOldResults();
383
399
  } catch (err) {
384
- logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
400
+ logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
401
+ }
402
+ }
403
+
404
+ export function listPreviousResults(): string[] {
405
+ const directory = path.join(getConfigDirectoryPath(), 'output');
406
+ const files = fs.readdirSync(directory);
407
+ const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
408
+ const sortedFiles = resultsFiles.sort((a, b) => {
409
+ const statA = fs.statSync(path.join(directory, a));
410
+ const statB = fs.statSync(path.join(directory, b));
411
+ return statB.birthtime.getTime() - statA.birthtime.getTime(); // sort in descending order
412
+ });
413
+ return sortedFiles;
414
+ }
415
+
416
+ export function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
417
+ const sortedFiles = listPreviousResults();
418
+ for (let i = 0; i < sortedFiles.length - remaining; i++) {
419
+ fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFiles[i]));
420
+ }
421
+ }
422
+
423
+ export function readResult(
424
+ name: string,
425
+ ): { results: EvaluateSummary; config: Partial<UnifiedConfig> } | undefined {
426
+ const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
427
+ const resultsPath = path.join(resultsDirectory, name);
428
+ try {
429
+ const results = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
430
+ return results;
431
+ } catch (err) {
432
+ logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
385
433
  }
386
434
  }
387
435
 
@@ -389,12 +437,7 @@ export function readLatestResults():
389
437
  | { results: EvaluateSummary; config: Partial<UnifiedConfig> }
390
438
  | undefined {
391
439
  const latestResultsPath = getLatestResultsPath();
392
- try {
393
- const latestResults = JSON.parse(fs.readFileSync(latestResultsPath, 'utf-8'));
394
- return latestResults;
395
- } catch (err) {
396
- logger.error(`Failed to read latest results from ${latestResultsPath}:\n${err}`);
397
- }
440
+ return readResult(latestResultsPath);
398
441
  }
399
442
 
400
443
  export function cosineSimilarity(vecA: number[], vecB: number[]) {
@@ -14,6 +14,7 @@ function App() {
14
14
  const { table, setTable, setConfig } = useStore();
15
15
  const [loaded, setLoaded] = React.useState<boolean>(false);
16
16
  const loadedFromApi = React.useRef(false);
17
+ const [recentFiles, setRecentFiles] = React.useState<string[]>([]);
17
18
 
18
19
  const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
19
20
  const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
@@ -43,6 +44,22 @@ function App() {
43
44
  }
44
45
  }, [prefersDarkMode]);
45
46
 
47
+ const fetchRecentFiles = async () => {
48
+ if (!window.location.href.includes('localhost')) {
49
+ return;
50
+ }
51
+ const resp = await fetch(`http://localhost:15500/results`);
52
+ const body = await resp.json();
53
+ setRecentFiles(body.data);
54
+ };
55
+
56
+ const handleRecentFileSelection = async (file: string) => {
57
+ const resp = await fetch(`http://localhost:15500/results/${file}`);
58
+ const body = await resp.json();
59
+ setTable(body.data.results.table);
60
+ setConfig(body.data.config);
61
+ };
62
+
46
63
  React.useEffect(() => {
47
64
  const fetchEvalData = async (id: string) => {
48
65
  if (loadedFromApi.current) {
@@ -72,12 +89,14 @@ function App() {
72
89
  setLoaded(true);
73
90
  setTable(data.results.table);
74
91
  setConfig(data.config);
92
+ fetchRecentFiles();
75
93
  });
76
94
 
77
95
  socket.on('update', (data) => {
78
96
  console.log('Received data update', data);
79
97
  setTable(data.results.table);
80
98
  setConfig(data.config);
99
+ fetchRecentFiles();
81
100
  });
82
101
  }
83
102
 
@@ -89,7 +108,11 @@ function App() {
89
108
  return (
90
109
  <ThemeProvider theme={theme}>
91
110
  <NavBar darkMode={darkMode} onToggleDarkMode={toggleDarkMode} />
92
- {loaded && table ? <ResultsView /> : <div>Loading...</div>}
111
+ {loaded && table ? (
112
+ <ResultsView recentFiles={recentFiles} onRecentFileSelected={handleRecentFileSelection} />
113
+ ) : (
114
+ <div>Loading...</div>
115
+ )}
93
116
  </ThemeProvider>
94
117
  );
95
118
  }
@@ -37,7 +37,26 @@ const ResponsiveStack = styled(Stack)(({ theme }) => ({
37
37
  },
38
38
  }));
39
39
 
40
- export default function ResultsView() {
40
+ function filenameToDate(filename: string) {
41
+ const dateString = filename.slice('eval-'.length, filename.length - '.json'.length);
42
+ const date = new Date(dateString);
43
+ return date.toLocaleDateString('en-US', {
44
+ year: 'numeric',
45
+ month: 'long',
46
+ day: 'numeric',
47
+ hour: '2-digit',
48
+ minute: '2-digit',
49
+ second: '2-digit',
50
+ timeZoneName: 'short',
51
+ });
52
+ }
53
+
54
+ interface ResultsViewProps {
55
+ recentFiles: string[];
56
+ onRecentFileSelected: (file: string) => void;
57
+ }
58
+
59
+ export default function ResultsView({ recentFiles, onRecentFileSelected }: ResultsViewProps) {
41
60
  const { table, config } = useStore();
42
61
  const [maxTextLength, setMaxTextLength] = React.useState(250);
43
62
  const [columnVisibility, setColumnVisibility] = React.useState<VisibilityState>({});
@@ -148,10 +167,30 @@ export default function ResultsView() {
148
167
  return (
149
168
  <div>
150
169
  <Paper py="md">
151
- <ResponsiveStack direction="row" spacing={8} alignItems="center">
170
+ <ResponsiveStack direction="row" spacing={4} alignItems="center">
171
+ <Box>
172
+ {recentFiles && recentFiles.length > 0 && (
173
+ <FormControl sx={{ m: 1, minWidth: 200 }} size="small">
174
+ <InputLabel>View run</InputLabel>
175
+ <Select
176
+ key={recentFiles.join(',')}
177
+ className="recent-files"
178
+ label="Previous runs"
179
+ defaultValue={recentFiles[0]}
180
+ onChange={(e: SelectChangeEvent) => onRecentFileSelected(e.target.value)}
181
+ >
182
+ {recentFiles.map((file) => (
183
+ <MenuItem key={file} value={file}>
184
+ {filenameToDate(file)}
185
+ </MenuItem>
186
+ ))}
187
+ </Select>
188
+ </FormControl>
189
+ )}
190
+ </Box>
152
191
  <Box>
153
192
  <FormControl sx={{ m: 1, minWidth: 200 }} size="small">
154
- <InputLabel id="visible-columns-label">Visible columns</InputLabel>
193
+ <InputLabel id="visible-columns-label">Show columns</InputLabel>
155
194
  <Select
156
195
  labelId="visible-columns-label"
157
196
  id="visible-columns"
package/src/web/server.ts CHANGED
@@ -1,4 +1,4 @@
1
- import fs from 'fs';
1
+ import fs, { Stats } from 'fs';
2
2
  import path from 'node:path';
3
3
  import readline from 'node:readline';
4
4
  import http from 'node:http';
@@ -11,7 +11,7 @@ import { Server as SocketIOServer } from 'socket.io';
11
11
 
12
12
  import logger from '../logger';
13
13
  import { getDirectory } from '../esm';
14
- import { getLatestResultsPath } from '../util';
14
+ import { getLatestResultsPath, listPreviousResults, readResult } from '../util';
15
15
 
16
16
  export function init(port = 15500) {
17
17
  const app = express();
@@ -40,14 +40,37 @@ export function init(port = 15500) {
40
40
  socket.emit('init', readLatestJson());
41
41
 
42
42
  // Watch for changes to latest.json and emit the update event
43
- fs.watch(
44
- latestJsonPath,
45
- debounce((event: string) => {
46
- if (event === 'change') {
47
- socket.emit('update', readLatestJson());
48
- }
49
- }, 250),
50
- );
43
+ const watcher = debounce((curr: Stats, prev: Stats) => {
44
+ if (curr.mtime !== prev.mtime) {
45
+ socket.emit('update', readLatestJson());
46
+ }
47
+ }, 250);
48
+ fs.watchFile(latestJsonPath, watcher);
49
+
50
+ // Stop watching the file when the socket connection is closed
51
+ socket.on('disconnect', () => {
52
+ fs.unwatchFile(latestJsonPath, watcher);
53
+ });
54
+ });
55
+
56
+ app.get('/results', (req, res) => {
57
+ const previousResults = listPreviousResults();
58
+ res.json({ data: previousResults });
59
+ });
60
+
61
+ app.get('/results/:filename', (req, res) => {
62
+ const filename = req.params.filename;
63
+ const safeFilename = path.basename(filename);
64
+ if (safeFilename !== filename || !listPreviousResults().includes(safeFilename)) {
65
+ res.status(400).send('Invalid filename');
66
+ return;
67
+ }
68
+ const result = readResult(safeFilename);
69
+ if (!result) {
70
+ res.status(404).send('Result not found');
71
+ return;
72
+ }
73
+ res.json({ data: result });
51
74
  });
52
75
 
53
76
  httpServer.listen(port, () => {