promptfoo 0.17.7 → 0.17.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +2 -0
  2. package/dist/package.json +1 -1
  3. package/dist/src/evaluator.d.ts.map +1 -1
  4. package/dist/src/evaluator.js +31 -6
  5. package/dist/src/evaluator.js.map +1 -1
  6. package/dist/src/main.js +2 -0
  7. package/dist/src/main.js.map +1 -1
  8. package/dist/src/providers/azureopenai.d.ts +4 -0
  9. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  10. package/dist/src/providers/azureopenai.js +15 -0
  11. package/dist/src/providers/azureopenai.js.map +1 -1
  12. package/dist/src/providers/openai.d.ts +4 -0
  13. package/dist/src/providers/openai.d.ts.map +1 -1
  14. package/dist/src/providers/openai.js +21 -2
  15. package/dist/src/providers/openai.js.map +1 -1
  16. package/dist/src/providers/replicate.d.ts.map +1 -1
  17. package/dist/src/providers/replicate.js +2 -1
  18. package/dist/src/providers/replicate.js.map +1 -1
  19. package/dist/src/providers/shared.d.ts.map +1 -1
  20. package/dist/src/providers/shared.js.map +1 -1
  21. package/dist/src/types.d.ts +9 -2
  22. package/dist/src/types.d.ts.map +1 -1
  23. package/dist/src/util.d.ts +10 -3
  24. package/dist/src/util.d.ts.map +1 -1
  25. package/dist/src/util.js +125 -40
  26. package/dist/src/util.js.map +1 -1
  27. package/dist/src/web/client/assets/{index-13198388.js → index-8388d689.js} +25 -25
  28. package/dist/src/web/client/assets/{index-f9b230d1.css → index-d2b6a160.css} +1 -1
  29. package/dist/src/web/client/index.html +2 -2
  30. package/dist/src/web/server.d.ts.map +1 -1
  31. package/dist/src/web/server.js +26 -3
  32. package/dist/src/web/server.js.map +1 -1
  33. package/package.json +1 -1
  34. package/src/evaluator.ts +37 -6
  35. package/src/main.ts +3 -0
  36. package/src/providers/azureopenai.ts +24 -0
  37. package/src/providers/openai.ts +32 -3
  38. package/src/providers/replicate.ts +7 -3
  39. package/src/providers/shared.ts +3 -1
  40. package/src/types.ts +12 -2
  41. package/src/util.ts +140 -42
  42. package/src/web/client/src/App.tsx +24 -1
  43. package/src/web/client/src/ResultsTable.css +11 -1
  44. package/src/web/client/src/ResultsTable.tsx +10 -0
  45. package/src/web/client/src/ResultsView.tsx +48 -3
  46. package/src/web/client/src/types.ts +4 -0
  47. package/src/web/server.ts +33 -10
@@ -1 +1 @@
1
- :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-actions{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
1
+ :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}th .action{cursor:pointer;margin-left:.5rem}tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr .cell-detail{visibility:hidden;position:absolute;bottom:.25rem;margin-top:1rem;font-size:.75rem;color:#888}tr:hover .cell-actions,tr:hover .cell-detail{visibility:visible}tr .cell-actions .action{cursor:pointer}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .score{font-weight:400}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-13198388.js"></script>
9
- <link rel="stylesheet" href="/assets/index-f9b230d1.css">
8
+ <script type="module" crossorigin src="/assets/index-8388d689.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-d2b6a160.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
@@ -1 +1 @@
1
- {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QA0DhC"}
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":"AAeA,wBAAgB,IAAI,CAAC,IAAI,SAAQ,QAiFhC"}
@@ -37,11 +37,34 @@ function init(port = 15500) {
37
37
  // Send the initial table data when a client connects
38
38
  socket.emit('init', readLatestJson());
39
39
  // Watch for changes to latest.json and emit the update event
40
- fs_1.default.watch(latestJsonPath, (0, debounce_1.default)((event) => {
41
- if (event === 'change') {
40
+ const watcher = (0, debounce_1.default)((curr, prev) => {
41
+ if (curr.mtime !== prev.mtime) {
42
42
  socket.emit('update', readLatestJson());
43
43
  }
44
- }, 250));
44
+ }, 250);
45
+ fs_1.default.watchFile(latestJsonPath, watcher);
46
+ // Stop watching the file when the socket connection is closed
47
+ socket.on('disconnect', () => {
48
+ fs_1.default.unwatchFile(latestJsonPath, watcher);
49
+ });
50
+ });
51
+ app.get('/results', (req, res) => {
52
+ const previousResults = (0, util_1.listPreviousResults)();
53
+ res.json({ data: previousResults });
54
+ });
55
+ app.get('/results/:filename', (req, res) => {
56
+ const filename = req.params.filename;
57
+ const safeFilename = node_path_1.default.basename(filename);
58
+ if (safeFilename !== filename || !(0, util_1.listPreviousResults)().includes(safeFilename)) {
59
+ res.status(400).send('Invalid filename');
60
+ return;
61
+ }
62
+ const result = (0, util_1.readResult)(safeFilename);
63
+ if (!result) {
64
+ res.status(404).send('Result not found');
65
+ return;
66
+ }
67
+ res.json({ data: result });
45
68
  });
46
69
  httpServer.listen(port, () => {
47
70
  const url = `http://localhost:${port}`;
@@ -1 +1 @@
1
- {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAA+C;AAE/C,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA1DD,oBA0DC"}
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAA+B;AAC/B,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAErD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAAgF;AAEhF,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC;QAEtC,6DAA6D;QAC7D,MAAM,OAAO,GAAG,IAAA,kBAAQ,EAAC,CAAC,IAAW,EAAE,IAAW,EAAE,EAAE;YACpD,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,EAAE;gBAC7B,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,EAAE,CAAC,CAAC;aACzC;QACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACR,YAAE,CAAC,SAAS,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAEtC,8DAA8D;QAC9D,MAAM,CAAC,EAAE,CAAC,YAAY,EAAE,GAAG,EAAE;YAC3B,YAAE,CAAC,WAAW,CAAC,cAAc,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QAC/B,MAAM,eAAe,GAAG,IAAA,0BAAmB,GAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,eAAe,EAAE,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;IAEH,GAAG,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;QACzC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC;QACrC,MAAM,YAAY,GAAG,mBAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,YAAY,KAAK,QAAQ,IAAI,CAAC,IAAA,0BAAmB,GAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE;YAC9E,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,MAAM,MAAM,GAAG,IAAA,iBAAU,EAAC,YAAY,CAAC,CAAC;QACxC,IAAI,CAAC,MAAM,EAAE;YACX,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;YACzC,OAAO;SACR;QACD,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAjFD,oBAiFC"}
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.17.7",
5
+ "version": "0.17.9",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/src/index.js",
package/src/evaluator.ts CHANGED
@@ -3,6 +3,7 @@ import readline from 'readline';
3
3
  import async from 'async';
4
4
  import chalk from 'chalk';
5
5
  import nunjucks from 'nunjucks';
6
+ import invariant from 'tiny-invariant';
6
7
 
7
8
  import logger from './logger';
8
9
  import telemetry from './telemetry';
@@ -110,18 +111,40 @@ class Evaluator {
110
111
  vars,
111
112
  };
112
113
 
114
+ let latencyMs = 0;
113
115
  try {
116
+ const startTime = Date.now();
114
117
  const response = await provider.callApi(renderedPrompt);
118
+ const endTime = Date.now();
119
+ latencyMs = endTime - startTime;
120
+
115
121
  const ret: EvaluateResult = {
116
122
  ...setup,
117
123
  response,
118
124
  success: false,
119
125
  score: 0,
126
+ latencyMs,
120
127
  };
121
128
  if (response.error) {
122
129
  ret.error = response.error;
123
130
  } else if (response.output) {
124
- const checkResult = await runAssertions(test, response.output);
131
+ // Create a copy of response so we can potentially mutate it.
132
+ let processedResponse = { ...response };
133
+ if (test.options?.postprocess) {
134
+ const { postprocess } = test.options;
135
+ const postprocessFn = new Function(
136
+ 'output',
137
+ 'context',
138
+ postprocess.includes('\n') ? postprocess : `return ${postprocess}`,
139
+ );
140
+ processedResponse.output = postprocessFn(processedResponse.output);
141
+ if (processedResponse.output == null) {
142
+ throw new Error('Postprocess function did not return a value');
143
+ }
144
+ }
145
+
146
+ invariant(processedResponse.output != null, 'Response output should not be null');
147
+ const checkResult = await runAssertions(test, processedResponse.output);
125
148
  if (!checkResult.pass) {
126
149
  ret.error = checkResult.reason;
127
150
  }
@@ -132,6 +155,7 @@ class Evaluator {
132
155
  this.stats.tokenUsage.prompt += checkResult.tokensUsed.prompt;
133
156
  this.stats.tokenUsage.completion += checkResult.tokensUsed.completion;
134
157
  }
158
+ ret.response = processedResponse;
135
159
  } else {
136
160
  ret.success = false;
137
161
  ret.score = 0;
@@ -159,6 +183,7 @@ class Evaluator {
159
183
  error: String(err) + '\n\n' + (err as Error).stack,
160
184
  success: false,
161
185
  score: 0,
186
+ latencyMs,
162
187
  };
163
188
  }
164
189
  }
@@ -232,11 +257,13 @@ class Evaluator {
232
257
  // Aggregate all vars across test cases
233
258
 
234
259
  const tests = (
235
- testSuite.tests || [
236
- {
237
- // Dummy test for cases when we're only comparing raw prompts.
238
- },
239
- ]
260
+ testSuite.tests && testSuite.tests.length > 0
261
+ ? testSuite.tests
262
+ : [
263
+ {
264
+ // Dummy test for cases when we're only comparing raw prompts.
265
+ },
266
+ ]
240
267
  ).map((test) => {
241
268
  const finalTestCase: TestCase = Object.assign({}, testSuite.defaultTest);
242
269
  return Object.assign(finalTestCase, test);
@@ -270,6 +297,8 @@ class Evaluator {
270
297
  testCase.options?.prefix || testSuite.defaultTest?.options?.prefix || '';
271
298
  const appendToPrompt =
272
299
  testCase.options?.suffix || testSuite.defaultTest?.options?.suffix || '';
300
+ testCase.options.postprocess =
301
+ testCase.options.postprocess || testSuite.defaultTest?.options?.postprocess;
273
302
 
274
303
  // Finalize test case eval
275
304
  const varCombinations = generateVarCombinations(testCase.vars || {});
@@ -404,6 +433,8 @@ class Evaluator {
404
433
  score: row.score,
405
434
  text: resultText,
406
435
  prompt: row.prompt.raw,
436
+ latencyMs: row.latencyMs,
437
+ tokenUsage: row.response?.tokenUsage,
407
438
  };
408
439
  },
409
440
  );
package/src/main.ts CHANGED
@@ -11,6 +11,7 @@ import logger, { getLogLevel, setLogLevel } from './logger';
11
11
  import { loadApiProvider, loadApiProviders } from './providers';
12
12
  import { evaluate } from './evaluator';
13
13
  import {
14
+ cleanupOldResults,
14
15
  maybeReadConfig,
15
16
  readConfig,
16
17
  readLatestResults,
@@ -181,6 +182,7 @@ async function main() {
181
182
  .action(async () => {
182
183
  telemetry.maybeShowNotice();
183
184
  await clearCache();
185
+ cleanupOldResults(0);
184
186
  telemetry.record('command_used', {
185
187
  name: 'cache_clear',
186
188
  });
@@ -321,6 +323,7 @@ async function main() {
321
323
  suffix: cmdObj.promptSuffix,
322
324
  provider: cmdObj.grader,
323
325
  // rubricPrompt:
326
+ // postprocess
324
327
  },
325
328
  ...config.defaultTest,
326
329
  };
@@ -6,6 +6,10 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
6
6
 
7
7
  interface AzureOpenAiCompletionOptions {
8
8
  temperature?: number;
9
+ top_p?: number;
10
+ frequency_penalty?: number;
11
+ presence_penalty?: number;
12
+ best_of?: number;
9
13
  functions?: {
10
14
  name: string;
11
15
  description?: string;
@@ -144,6 +148,17 @@ export class AzureOpenAiCompletionProvider extends AzureOpenAiGenericProvider {
144
148
  options?.temperature ??
145
149
  this.options.temperature ??
146
150
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
151
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
152
+ presence_penalty:
153
+ options?.presence_penalty ??
154
+ this.options.presence_penalty ??
155
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
156
+ frequency_penalty:
157
+ options?.frequency_penalty ??
158
+ this.options.frequency_penalty ??
159
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
160
+ best_of:
161
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
147
162
  stop,
148
163
  };
149
164
  logger.debug(`Calling Azure OpenAI API: ${JSON.stringify(body)}`);
@@ -214,6 +229,15 @@ export class AzureOpenAiChatCompletionProvider extends AzureOpenAiGenericProvide
214
229
  options?.temperature ??
215
230
  this.options.temperature ??
216
231
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
232
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
233
+ presence_penalty:
234
+ options?.presence_penalty ??
235
+ this.options.presence_penalty ??
236
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
237
+ frequency_penalty:
238
+ options?.frequency_penalty ??
239
+ this.options.frequency_penalty ??
240
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
217
241
  functions: options?.functions || this.options.functions || undefined,
218
242
  function_call: options?.function_call || this.options.function_call || undefined,
219
243
  };
@@ -1,4 +1,3 @@
1
-
2
1
  import logger from '../logger';
3
2
  import { fetchJsonWithCache } from '../cache';
4
3
  import { REQUEST_TIMEOUT_MS, parseChatPrompt } from './shared';
@@ -10,6 +9,10 @@ const DEFAULT_OPENAI_HOST = 'api.openai.com';
10
9
  interface OpenAiCompletionOptions {
11
10
  temperature?: number;
12
11
  max_tokens?: number;
12
+ top_p?: number;
13
+ frequency_penalty?: number;
14
+ presence_penalty?: number;
15
+ best_of?: number;
13
16
  functions?: {
14
17
  name: string;
15
18
  description?: string;
@@ -148,11 +151,25 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
148
151
  const body = {
149
152
  model: this.modelName,
150
153
  prompt,
151
- max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
154
+ max_tokens:
155
+ options?.max_tokens ??
156
+ this.options.max_tokens ??
157
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
152
158
  temperature:
153
159
  options?.temperature ??
154
160
  this.options.temperature ??
155
161
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
162
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
163
+ presence_penalty:
164
+ options?.presence_penalty ??
165
+ this.options.presence_penalty ??
166
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
167
+ frequency_penalty:
168
+ options?.frequency_penalty ??
169
+ this.options.frequency_penalty ??
170
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
171
+ best_of:
172
+ options?.best_of ?? this.options.best_of ?? parseInt(process.env.OPENAI_BEST_OF || '1'),
156
173
  stop,
157
174
  };
158
175
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
@@ -231,11 +248,23 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
231
248
  const body = {
232
249
  model: this.modelName,
233
250
  messages: messages,
234
- max_tokens: options?.max_tokens ?? this.options.max_tokens ?? parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
251
+ max_tokens:
252
+ options?.max_tokens ??
253
+ this.options.max_tokens ??
254
+ parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
235
255
  temperature:
236
256
  options?.temperature ??
237
257
  this.options.temperature ??
238
258
  parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
259
+ top_p: options?.top_p ?? this.options.top_p ?? parseFloat(process.env.OPENAI_TOP_P || '1'),
260
+ presence_penalty:
261
+ options?.presence_penalty ??
262
+ this.options.presence_penalty ??
263
+ parseFloat(process.env.OPENAI_PRESENCE_PENALTY || '0'),
264
+ frequency_penalty:
265
+ options?.frequency_penalty ??
266
+ this.options.frequency_penalty ??
267
+ parseFloat(process.env.OPENAI_FREQUENCY_PENALTY || '0'),
239
268
  functions: options?.functions || this.options.functions || undefined,
240
269
  function_call: options?.function_call || this.options.function_call || undefined,
241
270
  };
@@ -65,9 +65,13 @@ export class ReplicateProvider implements ApiProvider {
65
65
  const data = {
66
66
  input: {
67
67
  prompt,
68
- max_length: this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
69
- temperature: this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
70
- repetition_penalty: this.options.repetition_penalty || parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
68
+ max_length:
69
+ this.options.max_length || parseInt(process.env.REPLICATE_MAX_LENGTH || '2046', 10),
70
+ temperature:
71
+ this.options.temperature || parseFloat(process.env.REPLICATE_TEMPERATURE || '0.01'),
72
+ repetition_penalty:
73
+ this.options.repetition_penalty ||
74
+ parseFloat(process.env.REPLICATE_REPETITION_PENALTY || '1.0'),
71
75
  },
72
76
  };
73
77
  response = await replicate.run(this.modelName as any, data);
@@ -4,7 +4,9 @@ export const REQUEST_TIMEOUT_MS = process.env.REQUEST_TIMEOUT_MS
4
4
  ? parseInt(process.env.REQUEST_TIMEOUT_MS, 10)
5
5
  : 300_000;
6
6
 
7
- export function parseChatPrompt(prompt: string): { role: string; content: string; name?: string }[] {
7
+ export function parseChatPrompt(
8
+ prompt: string,
9
+ ): { role: string; content: string; name?: string }[] {
8
10
  const trimmedPrompt = prompt.trim();
9
11
  if (trimmedPrompt.startsWith('- role:')) {
10
12
  try {
package/src/types.ts CHANGED
@@ -73,6 +73,10 @@ export interface PromptConfig {
73
73
  suffix?: string;
74
74
  }
75
75
 
76
+ export interface OutputConfig {
77
+ postprocess?: string;
78
+ }
79
+
76
80
  export interface EvaluateOptions {
77
81
  maxConcurrency?: number;
78
82
  showProgressBar?: boolean;
@@ -92,6 +96,7 @@ export interface EvaluateResult {
92
96
  error?: string;
93
97
  success: boolean;
94
98
  score: number;
99
+ latencyMs: number;
95
100
  }
96
101
 
97
102
  export interface EvaluateTableOutput {
@@ -99,6 +104,8 @@ export interface EvaluateTableOutput {
99
104
  score: number;
100
105
  text: string;
101
106
  prompt: string;
107
+ latencyMs: number;
108
+ tokenUsage?: Partial<TokenUsage>;
102
109
  }
103
110
 
104
111
  export interface EvaluateTable {
@@ -181,11 +188,14 @@ export interface TestCase {
181
188
  // Key-value pairs to substitute in the prompt
182
189
  vars?: Record<string, string | string[] | object>;
183
190
 
191
+ // Optional filepath or glob pattern to load vars from
192
+ loadVars?: string | string[];
193
+
184
194
  // Optional list of automatic checks to run on the LLM output
185
195
  assert?: Assertion[];
186
196
 
187
197
  // Additional configuration settings for the prompt
188
- options?: PromptConfig & GradingConfig;
198
+ options?: PromptConfig & OutputConfig & GradingConfig;
189
199
  }
190
200
 
191
201
  // Same as a TestCase, except the `vars` object has been flattened into its final form.
@@ -231,7 +241,7 @@ export interface TestSuiteConfig {
231
241
  prompts: string | string[];
232
242
 
233
243
  // Path to a test file, OR list of LLM prompt variations (aka "test case")
234
- tests: string | TestCase[];
244
+ tests: string | string[] | TestCase[];
235
245
 
236
246
  // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
237
247
  defaultTest?: Omit<TestCase, 'description'>;